In [64]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

In [65]:
options = ['sexuality','neighborhood', 'disability', 'marital status', 'age', 'race', 'ethnicity', 'religion', 'gender']
options_str = str.join(',', options)
options_str_key = options_str.replace(" ", "_") + ", total_count, highlighted_text"

In [66]:
chat = ChatOpenAI(temperature=0.0)

In [67]:
user_input = "The lender has completed all HUD checks and OFAC checks for all borrowers, and principals, none of which has resulted in need for further investigation. Based on the appraiser's analysis, this development project will add an additional 66 units and associated retail space which will be completed in early 2023."

In [68]:
user_input = "The property is in a highly sought-after neighborhood with good schools next to a Hindu temple in a predominantly black neighborhood. The lender has completed all HUD checks and OFAC checks for all borrowers, and principals, none of which has resulted in need for further investigation. Based on the appraiser's analysis, this development project will add an additional 66 units and associated retail space which will be completed in early 2023. The property is surrounded by a white colored fence that runs on the perimeter of the property"

In [69]:
sexuality_schema = ResponseSchema(name = "sexuality", description = "Total number of sexual bias found in the given text. Examples of sexual bias is: male-dominated, women-owned.", type = "number")

neighborhood_schema = ResponseSchema(name = "neighborhood", description = "Total number of neighborhood bias found in the given text. Examples of neighborhood bias is: highly sought-after neighborhood, good schools, rich community, poor neighborhood.", type = "number")

disability_schema = ResponseSchema(name = "disability", description = "Total number of disability bias found in the given text. Examples of disability bias is: mental disorder, blind, deaf, bipolar disorder, ADHD, hearing loss, epilepsy.", type = "number")

marital_status_schema = ResponseSchema(name = "marital_status", description = "Total number of marital status bias found in the given text. Examples of marital status bias is: single, married, separated, divorced, widowed", type = "number")

age_schema = ResponseSchema(name = "age", description = "Total number of age bias found in the given text. Examples of age bias is: elderly, old person, old people, very young.", type = "number")

race_schema = ResponseSchema(name = "race", description = "Total number of race bias found in the given text. Examples of race bias is: white, black, hispanic, asian, native american.", type = "number")

ethnicity_schema = ResponseSchema(name = "ethnicity", description = "Total number of ethnicity bias found in the given text. Examples of ethnicity bias is: mexican, african-american, chinese, japanese, indian, french-canadian, indigenous-people, pacific-islander, filipino, cuban.", type = "number")

religion_schema = ResponseSchema(name = "religion", description = "Total number of religious bias found in the given text. Examples of religious bias is: hindu temple, church, synagogue, mosque, christian, muslim, hindu, jew.", type = "number")

gender_schema = ResponseSchema(name = "gender", description = "Total number of gender bias found in the given text. Examples of gender bias is: male, female, transgender, gender neutral, non-binary, agender, pangender, genderqueer, two-spirit, third gender, and all combination of these.", type = "number")

bias_type_count_schema = ResponseSchema(name = 'bias_type_count', description="Total count of all type of biases found.", type = "number")

highlighted_text_schema = ResponseSchema(name = 'highlighted_text', description="Add the highlighted section where the biased phrases are found in the given text. Use Markdown for highlighting the biased phrases that were detected. If no biases are found, leave this empty")

explanation_schema = ResponseSchema(name = 'explanation', description= "Explain the response and your reasoning here")

In [70]:
schemas = {'sexuality': sexuality_schema,'neighborhood': neighborhood_schema, 'disability': disability_schema, 'marital status': marital_status_schema, 'age': age_schema, 'race': race_schema, 'ethnicity': ethnicity_schema, 'religion': religion_schema, 'gender': gender_schema, 'total_count': bias_type_count_schema, 'highlighted_text': highlighted_text_schema, 'explanation_text': explanation_schema}

response_schemas = [explanation_schema, bias_type_count_schema, highlighted_text_schema]

for option in options:
    response_schemas.append(schemas[option])

In [71]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"explanation": string  // Explain the response and your reasoning here
	"bias_type_count": number  // Total count of all type of biases found.
	"highlighted_text": string  // Add the highlighted section where the biased phrases are found in the given text. Use Markdown for highlighting the biased phrases that were detected. If no biases are found, leave this empty
	"sexuality": number  // Total number of sexual bias found in the given text. Examples of sexual bias is: male-dominated, women-owned.
	"neighborhood": number  // Total number of neighborhood bias found in the given text. Examples of neighborhood bias is: highly sought-after neighborhood, good schools, rich community, poor neighborhood.
	"disability": number  // Total number of disability bias found in the given text. Examples of disability bias is: mental disorder, blind, deaf, b

In [72]:
template_string = f"""
            For the following text find the total number of biases that indicates a reference to prohibited terms such as: {options_str}.
            Example of what is not a bias: "white fence", "black door", "brown pen".
            """        

In [73]:
template_string = template_string + """
text: {text} 

{format_instructions}
"""
prompt_template = ChatPromptTemplate.from_template(template_string)

In [74]:
## Create a prompt from user input
prompt = prompt_template.format_messages (text=user_input, 
                                format_instructions=format_instructions)
print(prompt[0].content)


            For the following text find the total number of biases that indicates a reference to prohibited terms such as: sexuality,neighborhood,disability,marital status,age,race,ethnicity,religion,gender.
            Example of what is not a bias: "white fence", "black door", "brown pen".
            
text: The property is in a highly sought-after neighborhood with good schools next to a Hindu temple in a predominantly black neighborhood. The lender has completed all HUD checks and OFAC checks for all borrowers, and principals, none of which has resulted in need for further investigation. Based on the appraiser's analysis, this development project will add an additional 66 units and associated retail space which will be completed in early 2023. The property is surrounded by a white colored fence that runs on the perimeter of the property 

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json

In [75]:
## Call OpenAI LLM with prompt and receive response
response = chat(prompt)

In [76]:
## Process response
print(response.content)

```json
{
	"explanation": "The text contains several biases related to neighborhood, race, and religion. The phrase 'predominantly black neighborhood' indicates a race bias. The mention of a 'Hindu temple' indicates a religious bias. The phrase 'highly sought-after neighborhood' indicates a neighborhood bias. There are no biases related to sexuality, disability, marital status, age, ethnicity, or gender.",
	"bias_type_count": 3,
	"highlighted_text": "**The property is in a highly sought-after neighborhood with good schools next to a Hindu temple in a predominantly black neighborhood**. The lender has completed all HUD checks and OFAC checks for all borrowers, and principals, none of which has resulted in need for further investigation. Based on the appraiser's analysis, this development project will add an additional 66 units and associated retail space which will be completed in early 2023. The property is surrounded by a white colored fence that runs on the perimeter of the property"

In [77]:
response_content = output_parser.parse(response.content)
print(type(response_content))
print(response_content)

<class 'dict'>
{'explanation': "The text contains several biases related to neighborhood, race, and religion. The phrase 'predominantly black neighborhood' indicates a race bias. The mention of a 'Hindu temple' indicates a religious bias. The phrase 'highly sought-after neighborhood' indicates a neighborhood bias. There are no biases related to sexuality, disability, marital status, age, ethnicity, or gender.", 'bias_type_count': 3, 'highlighted_text': "**The property is in a highly sought-after neighborhood with good schools next to a Hindu temple in a predominantly black neighborhood**. The lender has completed all HUD checks and OFAC checks for all borrowers, and principals, none of which has resulted in need for further investigation. Based on the appraiser's analysis, this development project will add an additional 66 units and associated retail space which will be completed in early 2023. The property is surrounded by a white colored fence that runs on the perimeter of the proper