# Workplace Harassment Dataset

In [1]:
import requests
def get_cases():
    response = requests.get('https://raw.githubusercontent.com/amir-karami/Workplace_Sexual_Harassment/master/EverySexsism-data-Workspace-Final.txt').content.decode('iso8859-1')
    questions = []
    cases = response.split('\r\n')
    for case in cases:
        questions.append(case)
    return questions
cases_list = get_cases()
len(cases_list), cases_list[0]

(2362,
 '"When I was 17, I worked in a clothing store that sold both mens and womens clothing, as well as some unisex items. I was working overtime in the evening, around 8.30pm during the winter so it was already dark. I was by myself on the shop floor as my manager was cashing up in the stock room, when a man in his late 50s/early 60s came in and started asking about the winter boots we had on special offer. We kept all of the stock below the display table, so I knelt on the floor to find his size when he said \x93Whilst you\x92re down there, why don\x92t you take one for the team?\x94. As I was quite a naive 17 year old, I didn\x92t quite understand, so I just awkwardly smiled and ignored it. After he bought the boots and left, I went to speak with my manager. She confirmed that he was actually insinuating I gave him a blowjob, I was shocked and quite upset! She told me to just \x91ignore it and don\x92t get upset by it\x92! Even when I think about it now, it makes me extremely unco

# Data Ingestion

In [2]:
from llama_index.legacy import SimpleDirectoryReader
state = 'california'
documents = SimpleDirectoryReader(input_files=["./output_domain/{}.txt".format(state)]).load_data()
print(len(documents), documents[0].doc_id, documents[0].metadata)

1 c0be720c-f2fe-4faa-9a0b-5bf0b0af421d {'file_path': 'output_domain/california.txt', 'file_name': 'california.txt', 'file_type': 'text/plain', 'file_size': 728688, 'creation_date': '2024-01-27', 'last_modified_date': '2024-01-27', 'last_accessed_date': '2024-03-14'}


# Vector DB

In [3]:
import chromadb
from llama_index.legacy.vector_stores import ChromaVectorStore
db = chromadb.PersistentClient(path="./models/{}".format(state))
chroma_collection = db.get_or_create_collection("{}_title".format(state))
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Node Splitting

In [4]:
from llama_index.legacy.ingestion import IngestionPipeline
from llama_index.legacy.node_parser import SentenceSplitter
transformations = [
    SentenceSplitter(chunk_size=512, chunk_overlap=64)
]
pipeline = IngestionPipeline(transformations=transformations, vector_store=vector_store)
nodes = pipeline.run(documents=documents)
len(nodes), type(nodes[0])

(344, llama_index.legacy.schema.TextNode)

In [5]:
nodes[0].text

'Topic - "CALIFORNIA TITLE IX DOCUMENTATION"\n\nDemocratic State (Blue Region) (This is crawled data)\n\nGender Equity/Title IX  - Equal Opportunity & Access (CA Dept of Education)\nCA Assessment of Student Performance and Progress (CAASPP)\nEnglish Language Proficiency Assessments for CA (ELPAC)\nCalifornia Longitudinal Pupil Achievement Data System (CALPADS)\nIt is the policy of the State of California that all persons, regardless  of their gender, should enjoy freedom from discrimination of any kind in the  educational institution of the state. The laws found in the\nare  collectively known as the Sex Equity in Education Act. These laws expand upon gender  equity and Title IX laws which provide guidance to California’s education system.  Each Local Educational Agency (LEA) will be responsible for following the laws  in addition to Title IX requirements.\nSenate Bill 1375 (California Education  Code, 221.61), creates new requirements for the Title IX coordinators’ informational  post

In [6]:
nodes[1].text

'A description of  how to file a complaint under Title IX, which shall include all of the  following:\nAn explanation of the statute of  limitations within which a complaint must be filed after an alleged incident of  discrimination has occurred, and how a complaint may be filed beyond the  statute of limitations.\nAn explanation of how the complaint will be  investigated and how the complainant may further pursue the complaint,  including, but not limited to, Internet Web links to this information on the  United States Department of Education Office for Civil Rights’ Internet Web  site.\nAn Internet Web link to the United States  Department of Education Office for Civil Rights complaints form, and the  contact information for the office, which shall include the phone number and  email address for the office.\nOn or before April 1, 2017, and annually  thereafter, the Superintendent shall send a letter through electronic means to  all public schools, private schools that receive federal

# Service and Storage Context

In [7]:
from llama_index.legacy.llms import Ollama
from llama_index.legacy.embeddings import HuggingFaceEmbedding
from llama_index.legacy import ServiceContext

llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434", request_timeout=1000, temperature=0.0, context_window=4096)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)

In [8]:
from llama_index.legacy.storage.storage_context import StorageContext
from llama_index.legacy import VectorStoreIndex

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes=nodes, service_context=service_context, storage_context=storage_context)
query_engine = index.as_query_engine(similarity_top_k=5,verbose=True)

# Querying

In [9]:
cases_list[0]

'"When I was 17, I worked in a clothing store that sold both mens and womens clothing, as well as some unisex items. I was working overtime in the evening, around 8.30pm during the winter so it was already dark. I was by myself on the shop floor as my manager was cashing up in the stock room, when a man in his late 50s/early 60s came in and started asking about the winter boots we had on special offer. We kept all of the stock below the display table, so I knelt on the floor to find his size when he said \x93Whilst you\x92re down there, why don\x92t you take one for the team?\x94. As I was quite a naive 17 year old, I didn\x92t quite understand, so I just awkwardly smiled and ignored it. After he bought the boots and left, I went to speak with my manager. She confirmed that he was actually insinuating I gave him a blowjob, I was shocked and quite upset! She told me to just \x91ignore it and don\x92t get upset by it\x92! Even when I think about it now, it makes me extremely uncomfortabl

In [10]:
question = """Here is the example of people's experience of getting harassed: - 
{}
Can you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in {}, 
considering that the same case happened in some univerity or in some workspace. Please provide the {} State's Title IX specific resolution.""".format(cases_list[0], state, state)
question

'Here is the example of people\'s experience of getting harassed: - \n"When I was 17, I worked in a clothing store that sold both mens and womens clothing, as well as some unisex items. I was working overtime in the evening, around 8.30pm during the winter so it was already dark. I was by myself on the shop floor as my manager was cashing up in the stock room, when a man in his late 50s/early 60s came in and started asking about the winter boots we had on special offer. We kept all of the stock below the display table, so I knelt on the floor to find his size when he said \x93Whilst you\x92re down there, why don\x92t you take one for the team?\x94. As I was quite a naive 17 year old, I didn\x92t quite understand, so I just awkwardly smiled and ignored it. After he bought the boots and left, I went to speak with my manager. She confirmed that he was actually insinuating I gave him a blowjob, I was shocked and quite upset! She told me to just \x91ignore it and don\x92t get upset by it\x9

In [11]:
response = query_engine.query(question)
print(response)

 Title IX is a federal law that prohibits sex discrimination in educational institutions and workplaces receiving federal funding. In the scenario provided, a 17-year-old employee was harassed by a customer in a clothing store. The harasser made an insinuation that the employee should perform a sexual act.

In California, Title IX requires schools and workplaces to respond promptly and effectively to reports of sexual harassment or gender-based harassment that denies or limits a student's ability to participate in or benefit from the educational program. The resolution for such incidents would involve the following steps:

1. Reporting: The employee should report the incident to their supervisor, manager, or human resources department as soon as possible. In this case, the employee reported the incident to her manager.
2. Investigation: The employer or educational institution must conduct a thorough investigation into the allegations. This may include interviewing witnesses, reviewing 

# Source

In [12]:
response

Response(response=" Title IX is a federal law that prohibits sex discrimination in educational institutions and workplaces receiving federal funding. In the scenario provided, a 17-year-old employee was harassed by a customer in a clothing store. The harasser made an insinuation that the employee should perform a sexual act.\n\nIn California, Title IX requires schools and workplaces to respond promptly and effectively to reports of sexual harassment or gender-based harassment that denies or limits a student's ability to participate in or benefit from the educational program. The resolution for such incidents would involve the following steps:\n\n1. Reporting: The employee should report the incident to their supervisor, manager, or human resources department as soon as possible. In this case, the employee reported the incident to her manager.\n2. Investigation: The employer or educational institution must conduct a thorough investigation into the allegations. This may include interviewi

In [13]:
for data in response.source_nodes:
    print(data.node_id, data.score)
    print("------------------------------------------------------")
    print(data.text)
    print("------------------------------------------------------")

84eee7df-a4ad-486e-9ca9-602252f8312d 0.608579232654816
------------------------------------------------------
This is consistent with the Court's underlying concern
Most commenters acknowledged that OCR has provided useful factors to determine
whether harassing conduct took place "in the context of providing aid,
benefits, or services." However, some commenters stated that additional
clarity and examples regarding the issue were needed. Commenters also suggested
clarifying references to quid pro quo and hostile environment harassment as
these two concepts, though useful, do not determine the issue of whether the
school itself is considered responsible for the harassment. We agree with
these concerns and have made significant revisions to the sections "Harassment
that Denies or Limits a Student's Ability to Participate in or Benefit from
the Education Program" and "Harassment by Teachers and Other Employees" to
Gender-based Harassment, Including Harassment Predicated on Sex-stereotyping

In [14]:
cases_list[1]

'"I left my job to return for a PhD with an intention of developing expertise in my field and preventing encounters with men who label me \x93ignorant\x94 or hint to my own incompetence. Part of my PhD requires public engagement via social media. In one such recent Twitter encounter, I stated my opinion and backed it up with facts. A man that disagreed then proceeded to lash out, calling my remarks \x93effete\x94 and \x93ignorant.\x94 I called out the blatant sexism, for which he continued to retaliate, calling my tone condescending. This is such a nasty cycle and I find it so extremely frustrating\x85if we sit back, it happens more. And if we speak up, sexism intensifies. We have our work cut out for us. "'

In [15]:
question = """Here is the example of people's experience of getting harassed: - 
{}
Can you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in {}, 
considering that the same case happened in some univerity or in some workspace. Please provide the {} State's Title IX specific resolution.""".format(cases_list[1], state, state)
question

'Here is the example of people\'s experience of getting harassed: - \n"I left my job to return for a PhD with an intention of developing expertise in my field and preventing encounters with men who label me \x93ignorant\x94 or hint to my own incompetence. Part of my PhD requires public engagement via social media. In one such recent Twitter encounter, I stated my opinion and backed it up with facts. A man that disagreed then proceeded to lash out, calling my remarks \x93effete\x94 and \x93ignorant.\x94 I called out the blatant sexism, for which he continued to retaliate, calling my tone condescending. This is such a nasty cycle and I find it so extremely frustrating\x85if we sit back, it happens more. And if we speak up, sexism intensifies. We have our work cut out for us. "\nCan you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in california, \nconsidering that the same case happened in some univerity or in some workspace. Please p

In [16]:
response = query_engine.query(question)
print(response)

 Title IX is a federal law that prohibits sex discrimination in educational institutions and workplaces receiving federal funding. In the scenario provided, an individual experienced gender-based harassment through public engagement on social media. While the context does not specify whether this occurred in an educational institution or workplace in California, I will provide a general resolution based on Title IX implementation in California.

1. Reporting: The individual should report the incident to the responsible school employee or employer, such as a supervisor, human resources representative, or Title IX coordinator. In the case of social media harassment, reporting it to the platform's administrators may also be necessary.

2. Investigation: Upon receiving the report, the educational institution or workplace is required to conduct a prompt and thorough investigation into the allegations. This includes interviewing witnesses, gathering evidence, and documenting the findings.

3

In [17]:
response

Response(response=" Title IX is a federal law that prohibits sex discrimination in educational institutions and workplaces receiving federal funding. In the scenario provided, an individual experienced gender-based harassment through public engagement on social media. While the context does not specify whether this occurred in an educational institution or workplace in California, I will provide a general resolution based on Title IX implementation in California.\n\n1. Reporting: The individual should report the incident to the responsible school employee or employer, such as a supervisor, human resources representative, or Title IX coordinator. In the case of social media harassment, reporting it to the platform's administrators may also be necessary.\n\n2. Investigation: Upon receiving the report, the educational institution or workplace is required to conduct a prompt and thorough investigation into the allegations. This includes interviewing witnesses, gathering evidence, and docum

In [18]:
for data in response.source_nodes:
    print(data.node_id, data.score)
    print("------------------------------------------------------")
    print(data.text)
    print("------------------------------------------------------")

84eee7df-a4ad-486e-9ca9-602252f8312d 0.6388722875437757
------------------------------------------------------
This is consistent with the Court's underlying concern
Most commenters acknowledged that OCR has provided useful factors to determine
whether harassing conduct took place "in the context of providing aid,
benefits, or services." However, some commenters stated that additional
clarity and examples regarding the issue were needed. Commenters also suggested
clarifying references to quid pro quo and hostile environment harassment as
these two concepts, though useful, do not determine the issue of whether the
school itself is considered responsible for the harassment. We agree with
these concerns and have made significant revisions to the sections "Harassment
that Denies or Limits a Student's Ability to Participate in or Benefit from
the Education Program" and "Harassment by Teachers and Other Employees" to
Gender-based Harassment, Including Harassment Predicated on Sex-stereotypin

# Response

In [19]:
import time
response_list = []
time_list = []
question_list = []

for i in range(2, 7):
    question = """Here is the example of people's experience of getting harassed: - 
    {}
    Can you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in {}, 
    considering that the same case happened in some univerity or in some workspace. Please provide the {} State's Title IX specific resolution.""".format(cases_list[i], state, state)
    start = time.time()
    response = query_engine.query(question)
    end = time.time()
    response_list.append(response)
    question_list.append(question)
    time_list.append(end - start)

In [20]:
time_list

[342.55276441574097,
 195.33065581321716,
 198.4912269115448,
 175.1895546913147,
 158.24173855781555]

In [21]:
for i in range(0, 5):
    print(response_list[i].response)
    print("--------------------------------------------------")

 Based on the context provided, if the harassment or discrimination being experienced is taking place in a California-based educational institution or workplace receiving federal funding, then the following steps should be taken under Title IX implementation in California:

1. Reporting: The individual who has experienced the harassment or discrimination should report it to the appropriate Title IX coordinator or other designated personnel at their school or workplace as soon as possible.
2. Investigation: Once a report is made, the institution is required to conduct a thorough investigation into the allegations.
3. Remedial Action: If the investigation finds that harassment or discrimination has occurred, the institution must take prompt and effective action to remedy the situation.
4. Retaliation: The institution is prohibited from retaliating against the individual who reported the harassment or discrimination.
5. Training: The institution should provide regular training to students

In [22]:
question_list[0]

'Here is the example of people\'s experience of getting harassed: - \n    "A leaflet about \x93supporting teenagers\x94 in my country published by so called \x93experienced\x94 psychotherapists contains a lot of pernicious gender stereotypes. The reason why I despair about living in my country is that girls get stereotyped as behaving one way and boys get stereotyped as behaving the opposite way, even when in reality there is a lot of overlap of behaviours between the sexes! In the leaflet, girls are automatically assumed to be \x93vulnerable\x94 and in need of \x93empowering\x94. This is kind of offensive and degrading to many young women who are capable and self sufficient. Some women do not view themselves as \x93helpless victims\x94, but instead as highly capable survivors! Calling girls \x93vulnerable\x94 or fragile doesn\x92t help them deal with stuff in an adaptive or a resilient way. While it is true that some young women struggle with issues around eating (please don\x92t use 

# Faithful Metrics

In [23]:
"""
    "Please tell if a given piece of information "
    "is supported by the context.\n"
    "You need to answer with either YES or NO.\n"
    "Answer YES if any of the context supports the information, even "
    "if most of the context is unrelated. "
    "Some examples are provided below. \n\n"
    "Information: Apple pie is generally double-crusted.\n"
    "Context: An apple pie is a fruit pie in which the principal filling "
    "ingredient is apples. \n"
    "Apple pie is often served with whipped cream, ice cream "
    "('apple pie à la mode'), custard or cheddar cheese.\n"
    "It is generally double-crusted, with pastry both above "
    "and below the filling; the upper crust may be solid or "
    "latticed (woven of crosswise strips).\n"
    "Answer: YES\n"
    "Information: Apple pies tastes bad.\n"
    "Context: An apple pie is a fruit pie in which the principal filling "
    "ingredient is apples. \n"
    "Apple pie is often served with whipped cream, ice cream "
    "('apple pie à la mode'), custard or cheddar cheese.\n"
    "It is generally double-crusted, with pastry both above "
    "and below the filling; the upper crust may be solid or "
    "latticed (woven of crosswise strips).\n"
    "Answer: NO\n"
    "Information: {query_str}\n"
    "Context: {context_str}\n"
    "Answer: 
"""

'\n    "Please tell if a given piece of information "\n    "is supported by the context.\n"\n    "You need to answer with either YES or NO.\n"\n    "Answer YES if any of the context supports the information, even "\n    "if most of the context is unrelated. "\n    "Some examples are provided below. \n\n"\n    "Information: Apple pie is generally double-crusted.\n"\n    "Context: An apple pie is a fruit pie in which the principal filling "\n    "ingredient is apples. \n"\n    "Apple pie is often served with whipped cream, ice cream "\n    "(\'apple pie à la mode\'), custard or cheddar cheese.\n"\n    "It is generally double-crusted, with pastry both above "\n    "and below the filling; the upper crust may be solid or "\n    "latticed (woven of crosswise strips).\n"\n    "Answer: YES\n"\n    "Information: Apple pies tastes bad.\n"\n    "Context: An apple pie is a fruit pie in which the principal filling "\n    "ingredient is apples. \n"\n    "Apple pie is often served with whipped cream,

In [24]:
llama2_llm = Ollama(model="llama2", base_url="http://127.0.0.1:11434", request_timeout=1000)
llama2_service_context = ServiceContext.from_defaults(llm=llama2_llm, embed_model=embed_model)

In [27]:
from llama_index.legacy.evaluation import FaithfulnessEvaluator
import nest_asyncio
nest_asyncio.apply()
faithfulness_llama2 = FaithfulnessEvaluator(service_context=llama2_service_context)
faithfulness_result = faithfulness_llama2.evaluate_response(query=question_list[0], response=response_list[0])

In [28]:
faithfulness_result

EvaluationResult(query=None, contexts=['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discuss

## Detail around Evaluation

In [29]:
print(faithfulness_result.query)

None


In [30]:
len(faithfulness_result.contexts), faithfulness_result.contexts

(5,
 ['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discussion and include examples\nof gend

In [31]:
[nodes.text for nodes in response_list[0].source_nodes]

['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discussion and include examples\nof gender-ba

In [33]:
faithfulness_result.passing, faithfulness_result.score

(True, 1.0)

In [35]:
faithfulness_result.response, faithfulness_result.feedback

(" Based on the context provided, if the harassment or discrimination being experienced is taking place in a California-based educational institution or workplace receiving federal funding, then the following steps should be taken under Title IX implementation in California:\n\n1. Reporting: The individual who has experienced the harassment or discrimination should report it to the appropriate Title IX coordinator or other designated personnel at their school or workplace as soon as possible.\n2. Investigation: Once a report is made, the institution is required to conduct a thorough investigation into the allegations.\n3. Remedial Action: If the investigation finds that harassment or discrimination has occurred, the institution must take prompt and effective action to remedy the situation.\n4. Retaliation: The institution is prohibited from retaliating against the individual who reported the harassment or discrimination.\n5. Training: The institution should provide regular training to 

In [37]:
print(faithfulness_result.feedback)

YES


## Result for response

In [38]:
total_faith = 0
faith_result = []
for i in range(0, len(response_list)):
    faithfulness_result = faithfulness_llama2.evaluate_response(query=question_list[i], response=response_list[i])
    faith_result.append(faithfulness_result)
    total_faith = total_faith + faithfulness_result.score
total_faith, total_faith/len(response_list)

(5.0, 1.0)

In [42]:
for faith in faith_result:
    print(faith.feedback, faith.score)
    # print(faith.contexts)
    print("-----------------------------")

Yes, the information is supported by the context provided in the passage. The passage provides information about the types of conduct that can constitute harassment and how it is evaluated under Title IX implementation in California institutions receiving federal funding. It also provides examples of gender-based harassment predicated on sex stereotyping and clarifies that such harassment is covered by Title IX if it is sufficiently serious to deny or limit a student's ability to participate in or benefit from the program. Additionally, the passage notes that both the proposed revised guidance and the final revised guidance indicate that incidents of sexual harassment and non-sexual, gender-based harassment can be combined to determine whether a hostile environment has been created. Overall, the information provided in the passage is consistent with the underlying concern of the Court's guidance on harassment that denies or limits a student's ability to participate in or benefit from t

# Factuality using FactKB

In [43]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [44]:
summary = response_list[0].response
article = ''
for nodes in response_list[0].source_nodes:
    article = article + nodes.text + '\n'
# response_list[0].source_nodes[0].text + response_list[0].source_nodes[1].text
input = [[summary, article]]

In [45]:
input

[[" Based on the context provided, if the harassment or discrimination being experienced is taking place in a California-based educational institution or workplace receiving federal funding, then the following steps should be taken under Title IX implementation in California:\n\n1. Reporting: The individual who has experienced the harassment or discrimination should report it to the appropriate Title IX coordinator or other designated personnel at their school or workplace as soon as possible.\n2. Investigation: Once a report is made, the institution is required to conduct a thorough investigation into the allegations.\n3. Remedial Action: If the investigation finds that harassment or discrimination has occurred, the institution must take prompt and effective action to remedy the situation.\n4. Retaliation: The institution is prohibited from retaliating against the individual who reported the harassment or discrimination.\n5. Training: The institution should provide regular training to

In [46]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base", padding="max_length", truncation=True)
factkb = AutoModelForSequenceClassification.from_pretrained("bunsenfeng/FactKB", num_labels = 2)
tokens = tokenizer(input, return_tensors="pt", padding="max_length", truncation=True)
result = torch.softmax(factkb(**tokens).logits, dim = 1)
print('The factuality score (0-1, 1 as factual) is: ', float(result[0][1]))

The factuality score (0-1, 1 as factual) is:  0.8803286552429199


In [47]:
factuality_list = []
for i in range(0, len(response_list)):
    contexts = ''
    for nodes in response_list[i].source_nodes:
        contexts = contexts + nodes.text + '\n'
    # contexts = response_list[i].source_nodes[0].text + response_list[i].source_nodes[1].text
    response = response_list[i].response
    input = [[response, contexts]]
    tokens = tokenizer(input, return_tensors="pt", padding="max_length", truncation=True)
    result = torch.softmax(factkb(**tokens).logits, dim = 1)
    factuality_list.append(float(result[0][1]))

In [48]:
sum(factuality_list)/len(factuality_list), factuality_list

(0.9526949524879456,
 [0.8803286552429199,
  0.9896696209907532,
  0.9910082221031189,
  0.9045525789260864,
  0.9979156851768494])

# Relevancy

In [52]:
"""
    "Your task is to evaluate if the response for the query \
    is in line with the context information provided.\n"
    "You have two options to answer. Either YES/ NO.\n"
    "Answer - YES, if the response for the query \
    is in line with context information otherwise NO.\n"
    "Query and Response: \n {query_str}\n"
    "Context: \n {context_str}\n"
    "Answer: "
"""

'\n    "Your task is to evaluate if the response for the query     is in line with the context information provided.\n"\n    "You have two options to answer. Either YES/ NO.\n"\n    "Answer - YES, if the response for the query     is in line with context information otherwise NO.\n"\n    "Query and Response: \n {query_str}\n"\n    "Context: \n {context_str}\n"\n    "Answer: "\n'

In [51]:
from llama_index.legacy.evaluation import RelevancyEvaluator
relevancy_llama2 = RelevancyEvaluator(service_context=llama2_service_context)
relevancy_result = relevancy_llama2.evaluate_response(query=question_list[0], response=response_list[0])

## Details about the Relevancy

In [53]:
relevancy_result

EvaluationResult(query='Here is the example of people\'s experience of getting harassed: - \n    "A leaflet about \x93supporting teenagers\x94 in my country published by so called \x93experienced\x94 psychotherapists contains a lot of pernicious gender stereotypes. The reason why I despair about living in my country is that girls get stereotyped as behaving one way and boys get stereotyped as behaving the opposite way, even when in reality there is a lot of overlap of behaviours between the sexes! In the leaflet, girls are automatically assumed to be \x93vulnerable\x94 and in need of \x93empowering\x94. This is kind of offensive and degrading to many young women who are capable and self sufficient. Some women do not view themselves as \x93helpless victims\x94, but instead as highly capable survivors! Calling girls \x93vulnerable\x94 or fragile doesn\x92t help them deal with stuff in an adaptive or a resilient way. While it is true that some young women struggle with issues around eatin

In [54]:
relevancy_result.query

'Here is the example of people\'s experience of getting harassed: - \n    "A leaflet about \x93supporting teenagers\x94 in my country published by so called \x93experienced\x94 psychotherapists contains a lot of pernicious gender stereotypes. The reason why I despair about living in my country is that girls get stereotyped as behaving one way and boys get stereotyped as behaving the opposite way, even when in reality there is a lot of overlap of behaviours between the sexes! In the leaflet, girls are automatically assumed to be \x93vulnerable\x94 and in need of \x93empowering\x94. This is kind of offensive and degrading to many young women who are capable and self sufficient. Some women do not view themselves as \x93helpless victims\x94, but instead as highly capable survivors! Calling girls \x93vulnerable\x94 or fragile doesn\x92t help them deal with stuff in an adaptive or a resilient way. While it is true that some young women struggle with issues around eating (please don\x92t use 

In [55]:
relevancy_result.contexts

['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discussion and include examples\nof gender-ba

In [56]:
relevancy_result.response

" Based on the context provided, if the harassment or discrimination being experienced is taking place in a California-based educational institution or workplace receiving federal funding, then the following steps should be taken under Title IX implementation in California:\n\n1. Reporting: The individual who has experienced the harassment or discrimination should report it to the appropriate Title IX coordinator or other designated personnel at their school or workplace as soon as possible.\n2. Investigation: Once a report is made, the institution is required to conduct a thorough investigation into the allegations.\n3. Remedial Action: If the investigation finds that harassment or discrimination has occurred, the institution must take prompt and effective action to remedy the situation.\n4. Retaliation: The institution is prohibited from retaliating against the individual who reported the harassment or discrimination.\n5. Training: The institution should provide regular training to s

In [57]:
relevancy_result.passing

True

In [58]:
relevancy_result.feedback

'Based on the information provided, the answer is YES. The question and response align as the context of the scenario provided matches the criteria for harassment/discrimination based on Title IX implementation in California. The individual experiencing the harassment has the right to report it to the appropriate Title IX coordinator or other designated personnel at their school or workplace, and the institution must take prompt and effective action to remedy the situation.'

In [59]:
relevancy_result.score

1.0

## Result for response

In [60]:
total_relevance = 0
rel_result = []
for i in range(0, len(response_list)):
    relevance_result = relevancy_llama2.evaluate_response(query=question_list[i], response=response_list[i])
    rel_result.append(relevance_result)
    total_relevance = total_relevance + relevance_result.score
total_relevance, total_relevance/len(response_list)

(3.0, 0.6)

In [61]:
for rel in rel_result:
    print(rel.feedback, rel.score)
    print("-----------------------------")

Based on the provided context, the answer is YES. The question and response provide evidence that the individual has experienced harassment or discrimination based on gender, which falls under Title IX protections in California institutions receiving federal funding. The response outlines the steps that should be taken under Title IX implementation in California to address the situation, including reporting, investigation, remedial action, and training. The guidance provided by the Office for Civil Rights (OCR) within the US Department of Education also supports the conclusion that harassment or discrimination based on gender in California institutions receiving federal funding is covered under Title IX and must be addressed through a comprehensive framework outlined in the guidance. 1.0
-----------------------------
Title IX of the Education Amendments of 1972 is a federal law that prohibits discrimination on the basis of sex in any education program or activity receiving federal fina

# Semantic Similarity

In [33]:
from llama_index.legacy.evaluation import SemanticSimilarityEvaluator
semanticsim_llama2 = SemanticSimilarityEvaluator(service_context=llama2_service_context)
ref = response_list[0].source_nodes[0].text + response_list[0].source_nodes[1].text
semanticsim_result = semanticsim_llama2.evaluate_response(response=response_list[0], reference=ref)
semanticsim_result

EvaluationResult(query=None, contexts=None, response=None, passing=True, feedback='Similarity score: 0.8228875075176627', score=0.8228875075176627, pairwise_source=None, invalid_result=False, invalid_reason=None)

In [35]:
ref = question_list[0]
semanticsim_result = semanticsim_llama2.evaluate_response(response=response_list[0], reference=ref)
semanticsim_result

EvaluationResult(query=None, contexts=None, response=None, passing=False, feedback='Similarity score: 0.690394844750004', score=0.690394844750004, pairwise_source=None, invalid_result=False, invalid_reason=None)

## Result for response

In [36]:
total_sem = 0
sem_result = []
for i in range(0, len(response_list)):
    ref = response_list[i].source_nodes[0].text + '\n' + response_list[i].source_nodes[1].text
    semsim_result = semanticsim_llama2.evaluate_response(response=response_list[i], reference=ref)
    sem_result.append(semsim_result)
    total_sem = total_sem + semsim_result.score
total_sem, total_sem/len(response_list)

(3.8602933441620637, 0.7720586688324127)

In [37]:
total_sem = 0
sem_result = []
for i in range(0, len(response_list)):
    ref = question_list[i]
    semsim_result = semanticsim_llama2.evaluate_response(response=response_list[i], reference=ref)
    sem_result.append(semsim_result)
    total_sem = total_sem + semsim_result.score
total_sem, total_sem/len(response_list)

(3.6512103876446673, 0.7302420775289334)

# Correctness Evaluation

In [62]:
def new_parser(eval_response: str):
    """
    Default parser function for evaluation response.

    Args:
        eval_response (str): The response string from the evaluation.

    Returns:
        Tuple[float, str]: A tuple containing the score as a float and the reasoning as a string.
    """
    score_str, reasoning_str = eval_response.split("\n", 1)
    if len(score_str.split(' ')) > 1:
        score = float(score_str.split(' ')[1])
    else:
        score = float(score_str)
    reasoning = reasoning_str.lstrip("\n")
    return score, reasoning

In [71]:
from llama_index.legacy.evaluation import CorrectnessEvaluator
correctness_llama2 = CorrectnessEvaluator(service_context=llama2_service_context)
ref = ''
for nodes in response_list[0].source_nodes:
    ref = ref + nodes.text + '\n'
correctness_result = correctness_llama2.evaluate_response(query=question_list[0], response=response_list[0], reference=ref)
correctness_result.feedback, correctness_result.score

ValueError: could not convert string to float: 'Based on the context provided in the document, if an individual experiences harassment or discrimination at a California-based educational institution or workplace receiving federal funding, they should take the following steps under Title IX implementation in California:'

In [64]:
from llama_index.legacy.prompts import (
    ChatMessage,
    ChatPromptTemplate,
    MessageRole,
)
DEFAULT_SYSTEM_TEMPLATE = """
You are an expert evaluation system for a question answering chatbot.

You are given the following information:
- a user query, and
- a generated answer

You may also be given a reference answer to use for reference in your evaluation.

Your job is to judge the relevance and correctness of the generated answer.
Output a single score that represents a holistic evaluation.
You must return your response in a line with only the score.
Do not return answers in any other format.
On a separate line provide your reasoning for the score as well.

Follow these guidelines for scoring:
- Your score has to be between 1 and 5, where 1 is the worst and 5 is the best.
- If the generated answer is not relevant to the user query, \
you should give a score of 1.
- If the generated answer is relevant but contains mistakes, \
you should give a score between 2 and 3.
- If the generated answer is relevant and fully correct, \
you should give a score between 4 and 5.

Example Response:
4.0
The generated answer has the exact same metrics as the reference answer, \
    but it is not as concise.

"""

DEFAULT_USER_TEMPLATE = """
## User Query
{query}

## Reference Answer
{reference_answer}

## Generated Answer
{generated_answer}
"""

DEFAULT_EVAL_TEMPLATE = ChatPromptTemplate(
    message_templates=[
        ChatMessage(role=MessageRole.SYSTEM, content=DEFAULT_SYSTEM_TEMPLATE),
        ChatMessage(role=MessageRole.USER, content=DEFAULT_USER_TEMPLATE),
    ]
)

In [65]:
ref = ''
for nodes in response_list[0].source_nodes:
    ref = ref + nodes.text

eval_response = await llama2_service_context.llm.apredict(
            prompt=DEFAULT_EVAL_TEMPLATE,
            query=question_list[0],
            generated_answer=response_list[0],
            reference_answer=ref,
)
eval_response

"Based on the provided context, if someone experiences harassment or discrimination in an educational establishment or workplace in California that receives federal funding, they should take the following steps under Title IX implementation in California:\n\n1. Reporting: The person who has experienced harassment or discrimination must report it to their institution's Title IX coordinator or other designated personnel as soon as possible.\n2. Investigation: Once a report is made, the institution must conduct a thorough investigation into the allegations.\n3. Remedial Action: If the investigation finds that harassment or discrimination has occurred, the institution must take prompt and effective action to remedy the situation.\n4. Retaliation: The institution is prohibited from retaliating against the person who reported the harassment or discrimination.\n5. Training: The institution should provide regular training to students, faculty, and staff on Title IX policies and procedures and 

"Based on the provided context, if someone experiences harassment or discrimination in an educational establishment or workplace in California that receives federal funding, they should take the following steps under Title IX implementation in California:\n\n1. Reporting: The person who has experienced harassment or discrimination must report it to their institution's Title IX coordinator or other designated personnel as soon as possible.\n2. Investigation: Once a report is made, the institution must conduct a thorough investigation into the allegations.\n3. Remedial Action: If the investigation finds that harassment or discrimination has occurred, the institution must take prompt and effective action to remedy the situation.\n4. Retaliation: The institution is prohibited from retaliating against the person who reported the harassment or discrimination.\n5. Training: The institution should provide regular training to students, faculty, and staff on Title IX policies and procedures and 

In [66]:
score, reasoning = new_parser(eval_response)
score, reasoning

ValueError: could not convert string to float: 'on'

## Result for response

In [65]:
total_correct = 0
cor_result = []
for i in range(0, len(response_list)):
    ref = ''
    for nodes in response_list[i].sources_nodes:
        ref = ref + nodes.text
    correct_result = await llama2_service_context.llm.apredict(
            prompt=DEFAULT_EVAL_TEMPLATE,
            query=question_list[i],
            generated_answer=response_list[i],
            reference_answer=ref,
    )
    score, reasoning = new_parser(correct_result)
    cor_result.append(correct_result)
    total_correct = total_correct + score
total_correct, total_correct/len(response_list)

(21.0, 4.2)

# Context Relevancy

In [132]:
from llama_index.legacy.evaluation import ContextRelevancyEvaluator
conrel_llama2 = ContextRelevancyEvaluator(service_context=llama2_service_context)
conrel_result = conrel_llama2.evaluate_response(query=question_list[1], response=response_list[1])

In [133]:
conrel_result.feedback

'Based on the provided query, I evaluate that the context retrieved is relevant to the user\'s query. The query discusses people\'s experiences of getting harassed and the need for resolution in California based on Title IX implementation. The context provides information on Title IX protection against sex discrimination, including gender-based harassment, and how it applies to both male and female students.\n\nIn relation to the user\'s query, the context provides valuable information on the legal framework for addressing harassment in California educational institutions. It explains that Title IX protects any "person" from sex discrimination, including gender-based harassment, and that both male and female students are protected from sexual harassment engaged in by a school\'s employees, other students, or third parties. The context also highlights the importance of assessing all related circumstances to determine whether a hostile environment exists, which is relevant to the user\'s

In [134]:
conrel_result

EvaluationResult(query='Here is the example of people\'s experience of getting harassed: - \n    "As a 16 year old girl, I know very well that sexism exists. On pretty much a daily basis, I am shouted out from car windows whilst walking down the street, or approached by strangers whilst out in public, usually by older men. This is something that as a female, you get used to, enough though you shouldn\x92t have to. One thing I will not get over though, is being told that sexism is not an issue. I take a sociology class at my college, and one of the subjects we cover if feminism. This is something that me and my friends are passionate about, we believe it needs to be talked about in order for things to change. There are however, a number of boys in that class who disagree with feminists and try to tell us everything we say is wrong. I can accept this; everyone is entitled to an opinion and I am willing to listen to anyone\x92s opinion. Today it reached a point where I can no longer just 

# Answer Relevancy

In [135]:
from llama_index.legacy.evaluation import AnswerRelevancyEvaluator
ansrel_llama2 = AnswerRelevancyEvaluator(service_context=llama2_service_context)
ansrel_result = ansrel_llama2.evaluate_response(query=question_list[0], response=response_list[0])

In [136]:
ansrel_result

EvaluationResult(query='Here is the example of people\'s experience of getting harassed: - \n    "A leaflet about \x93supporting teenagers\x94 in my country published by so called \x93experienced\x94 psychotherapists contains a lot of pernicious gender stereotypes. The reason why I despair about living in my country is that girls get stereotyped as behaving one way and boys get stereotyped as behaving the opposite way, even when in reality there is a lot of overlap of behaviours between the sexes! In the leaflet, girls are automatically assumed to be \x93vulnerable\x94 and in need of \x93empowering\x94. This is kind of offensive and degrading to many young women who are capable and self sufficient. Some women do not view themselves as \x93helpless victims\x94, but instead as highly capable survivors! Calling girls \x93vulnerable\x94 or fragile doesn\x92t help them deal with stuff in an adaptive or a resilient way. While it is true that some young women struggle with issues around eatin

In [139]:
ansrel_result.feedback

"1. Does the provided response match the subject matter of the user's query?\nYes, the response addresses the issue of gender stereotyping and its impact on students in an educational setting, which is relevant to the user's query.\n2. Does the provided response attempt to address the focus or perspective on the subject matter taken on by the user's query?\nYes, the response acknowledges the problem of gender stereotyping and provides practical steps for addressing it from a Title IX implementation perspective in California. It also considers the broader context of gender-based harassment and its impact on students' ability to participate in or benefit from educational programs.\nTotal score: 8/10"

In [142]:
import re
def parser_function(output_str: str):
    pattern = r"([\s\S]+)(?:\[Total score:\]\s*)(\d)"

    # Using regex to find all matches
    result = re.search(pattern, output_str)

    # Check if any match is found
    if result:
        # Assuming there's only one match in the text, extract feedback and response
        feedback, score = result.groups()
        score = float(score) if score is not None else score
        return score, feedback.strip()
    else:
        return None, None
a, b = parser_function(ansrel_result.feedback)

In [143]:
a, b

(None, None)