# Workplace Harassment Dataset

In [1]:
import requests
def get_cases():
    response = requests.get('https://raw.githubusercontent.com/amir-karami/Workplace_Sexual_Harassment/master/EverySexsism-data-Workspace-Final.txt').content.decode('iso8859-1')
    questions = []
    cases = response.split('\r\n')
    for case in cases:
        questions.append(case)
    return questions
cases_list = get_cases()
len(cases_list), cases_list[1]

(2362,
 '"I left my job to return for a PhD with an intention of developing expertise in my field and preventing encounters with men who label me \x93ignorant\x94 or hint to my own incompetence. Part of my PhD requires public engagement via social media. In one such recent Twitter encounter, I stated my opinion and backed it up with facts. A man that disagreed then proceeded to lash out, calling my remarks \x93effete\x94 and \x93ignorant.\x94 I called out the blatant sexism, for which he continued to retaliate, calling my tone condescending. This is such a nasty cycle and I find it so extremely frustrating\x85if we sit back, it happens more. And if we speak up, sexism intensifies. We have our work cut out for us. "')

# Data Ingestion

In [2]:
from llama_index.legacy import SimpleDirectoryReader
state = 'california'
documents = SimpleDirectoryReader(input_files=["./output_domain/{}.txt".format(state)]).load_data()
print(len(documents), documents[0].doc_id, documents[0].metadata)

1 0d1da30a-a3c7-4623-8826-b1ab4f9f6a69 {'file_path': 'output_domain/california.txt', 'file_name': 'california.txt', 'file_type': 'text/plain', 'file_size': 728688, 'creation_date': '2024-01-27', 'last_modified_date': '2024-01-27', 'last_accessed_date': '2024-03-14'}


# Vector DB

In [3]:
import chromadb
from llama_index.legacy.vector_stores import ChromaVectorStore
db = chromadb.PersistentClient(path="./models/{}_demo_10".format(state))
chroma_collection = db.get_or_create_collection("{}_title".format(state))
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Node Splitting

In [4]:
from llama_index.legacy.ingestion import IngestionPipeline
from llama_index.legacy.node_parser import SentenceSplitter
transformations = [
    SentenceSplitter(chunk_size=512, chunk_overlap=64)
]
pipeline = IngestionPipeline(transformations=transformations, vector_store=vector_store)
nodes = pipeline.run(documents=documents)
len(nodes), type(nodes[0])

(344, llama_index.legacy.schema.TextNode)

In [5]:
nodes[0].text

'Topic - "CALIFORNIA TITLE IX DOCUMENTATION"\n\nDemocratic State (Blue Region) (This is crawled data)\n\nGender Equity/Title IX  - Equal Opportunity & Access (CA Dept of Education)\nCA Assessment of Student Performance and Progress (CAASPP)\nEnglish Language Proficiency Assessments for CA (ELPAC)\nCalifornia Longitudinal Pupil Achievement Data System (CALPADS)\nIt is the policy of the State of California that all persons, regardless  of their gender, should enjoy freedom from discrimination of any kind in the  educational institution of the state. The laws found in the\nare  collectively known as the Sex Equity in Education Act. These laws expand upon gender  equity and Title IX laws which provide guidance to California’s education system.  Each Local Educational Agency (LEA) will be responsible for following the laws  in addition to Title IX requirements.\nSenate Bill 1375 (California Education  Code, 221.61), creates new requirements for the Title IX coordinators’ informational  post

In [6]:
nodes[1].text

'A description of  how to file a complaint under Title IX, which shall include all of the  following:\nAn explanation of the statute of  limitations within which a complaint must be filed after an alleged incident of  discrimination has occurred, and how a complaint may be filed beyond the  statute of limitations.\nAn explanation of how the complaint will be  investigated and how the complainant may further pursue the complaint,  including, but not limited to, Internet Web links to this information on the  United States Department of Education Office for Civil Rights’ Internet Web  site.\nAn Internet Web link to the United States  Department of Education Office for Civil Rights complaints form, and the  contact information for the office, which shall include the phone number and  email address for the office.\nOn or before April 1, 2017, and annually  thereafter, the Superintendent shall send a letter through electronic means to  all public schools, private schools that receive federal

# Service and Storage Context

In [7]:
from llama_index.legacy.llms import Ollama
from llama_index.legacy.embeddings import HuggingFaceEmbedding
from llama_index.legacy import ServiceContext

llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434", request_timeout=1000, temperature=0.0, context_window=4096)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)

In [8]:
from llama_index.legacy.storage.storage_context import StorageContext
from llama_index.legacy import VectorStoreIndex

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes=nodes, service_context=service_context, storage_context=storage_context)
query_engine = index.as_query_engine(similarity_top_k=10,verbose=True)

# Querying

In [9]:
cases_list[1]

'"I left my job to return for a PhD with an intention of developing expertise in my field and preventing encounters with men who label me \x93ignorant\x94 or hint to my own incompetence. Part of my PhD requires public engagement via social media. In one such recent Twitter encounter, I stated my opinion and backed it up with facts. A man that disagreed then proceeded to lash out, calling my remarks \x93effete\x94 and \x93ignorant.\x94 I called out the blatant sexism, for which he continued to retaliate, calling my tone condescending. This is such a nasty cycle and I find it so extremely frustrating\x85if we sit back, it happens more. And if we speak up, sexism intensifies. We have our work cut out for us. "'

In [10]:
question = """Here is the example of people's experience of getting harassed: - 
{}
Can you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in {}, 
considering that the same case happened in some univerity or in some workspace. Please provide the {} State's Title IX specific resolution.""".format(cases_list[1], state, state)
question

'Here is the example of people\'s experience of getting harassed: - \n"I left my job to return for a PhD with an intention of developing expertise in my field and preventing encounters with men who label me \x93ignorant\x94 or hint to my own incompetence. Part of my PhD requires public engagement via social media. In one such recent Twitter encounter, I stated my opinion and backed it up with facts. A man that disagreed then proceeded to lash out, calling my remarks \x93effete\x94 and \x93ignorant.\x94 I called out the blatant sexism, for which he continued to retaliate, calling my tone condescending. This is such a nasty cycle and I find it so extremely frustrating\x85if we sit back, it happens more. And if we speak up, sexism intensifies. We have our work cut out for us. "\nCan you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in california, \nconsidering that the same case happened in some univerity or in some workspace. Please p

In [11]:
response = query_engine.query(question)
print(response)

 In light of the context provided, a student or employee in California experiencing repeated sex-based harassment through social media, such as on Twitter, may follow these steps to seek resolution under Title IX:

1. Reporting: The individual should report the incident to their educational institution's or workplace's designated Title IX coordinator as soon as possible. This can typically be done via an online reporting form, email, or by contacting the Title IX office directly.
2. Investigation: Upon receiving a report, the institution or workplace is required to conduct a prompt and thorough investigation into the allegations. This may involve interviewing witnesses, gathering evidence, and providing both parties with opportunities to share their perspectives.
3. Remedial Action: If the investigation determines that sex-based harassment has occurred, the institution or workplace must take appropriate remedial action to address the situation. This could include disciplinary measures 

# Source

In [12]:
response

Response(response=" In light of the context provided, a student or employee in California experiencing repeated sex-based harassment through social media, such as on Twitter, may follow these steps to seek resolution under Title IX:\n\n1. Reporting: The individual should report the incident to their educational institution's or workplace's designated Title IX coordinator as soon as possible. This can typically be done via an online reporting form, email, or by contacting the Title IX office directly.\n2. Investigation: Upon receiving a report, the institution or workplace is required to conduct a prompt and thorough investigation into the allegations. This may involve interviewing witnesses, gathering evidence, and providing both parties with opportunities to share their perspectives.\n3. Remedial Action: If the investigation determines that sex-based harassment has occurred, the institution or workplace must take appropriate remedial action to address the situation. This could include

In [13]:
for data in response.source_nodes:
    print(data.node_id, data.score)
    print("------------------------------------------------------")
    print(data.text)
    print("------------------------------------------------------")

26925a33-f58a-40d1-a635-df3b7ed547c0 0.6388722875437757
------------------------------------------------------
This is consistent with the Court's underlying concern
Most commenters acknowledged that OCR has provided useful factors to determine
whether harassing conduct took place "in the context of providing aid,
benefits, or services." However, some commenters stated that additional
clarity and examples regarding the issue were needed. Commenters also suggested
clarifying references to quid pro quo and hostile environment harassment as
these two concepts, though useful, do not determine the issue of whether the
school itself is considered responsible for the harassment. We agree with
these concerns and have made significant revisions to the sections "Harassment
that Denies or Limits a Student's Ability to Participate in or Benefit from
the Education Program" and "Harassment by Teachers and Other Employees" to
Gender-based Harassment, Including Harassment Predicated on Sex-stereotypin

In [22]:
import time
response_list = []
time_list = []
question_list = []

for i in range(2, 7):
    question = """Here is the example of people's experience of getting harassed: - 
    {}
    Can you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in {}, 
    considering that the same case happened in some univerity or in some workspace. Please provide the {} State's Title IX specific resolution.""".format(cases_list[i], state, state)
    start = time.time()
    response = query_engine.query(question)
    end = time.time()
    response_list.append(response)
    question_list.append(question)
    time_list.append(end - start)

In [23]:
time_list

[601.9465453624725,
 345.8997447490692,
 326.33608293533325,
 317.9749345779419,
 308.5421390533447]

In [24]:
for i in range(0, 5):
    print(response_list[i].response)
    print("--------------------------------------------------")

 In California-based educational institutions or workplaces receiving federal funding, where harassment or discrimination based on sex is reported, the following actions should be taken in accordance with Title IX implementation:

1. Reporting: The affected individual should report the incident to the designated Title IX coordinator or other responsible personnel as soon as possible.
2. Investigation: Upon receiving a report, the institution must initiate a thorough investigation into the allegations.
3. Remedial Action: If the investigation reveals that harassment or discrimination has occurred, prompt and effective action should be taken to address the situation.
4. Confidentiality: All reports and investigations must be conducted in a confidential manner, ensuring privacy for all involved parties.
5. Support Services: The institution should provide necessary resources and support services to help the affected individual cope with the impact of the harassment or discrimination.
6. Re

In [25]:
question_list[0]

'Here is the example of people\'s experience of getting harassed: - \n    "A leaflet about \x93supporting teenagers\x94 in my country published by so called \x93experienced\x94 psychotherapists contains a lot of pernicious gender stereotypes. The reason why I despair about living in my country is that girls get stereotyped as behaving one way and boys get stereotyped as behaving the opposite way, even when in reality there is a lot of overlap of behaviours between the sexes! In the leaflet, girls are automatically assumed to be \x93vulnerable\x94 and in need of \x93empowering\x94. This is kind of offensive and degrading to many young women who are capable and self sufficient. Some women do not view themselves as \x93helpless victims\x94, but instead as highly capable survivors! Calling girls \x93vulnerable\x94 or fragile doesn\x92t help them deal with stuff in an adaptive or a resilient way. While it is true that some young women struggle with issues around eating (please don\x92t use 

# Faithful Metrics

In [14]:
"""
    "Please tell if a given piece of information "
    "is supported by the context.\n"
    "You need to answer with either YES or NO.\n"
    "Answer YES if any of the context supports the information, even "
    "if most of the context is unrelated. "
    "Some examples are provided below. \n\n"
    "Information: Apple pie is generally double-crusted.\n"
    "Context: An apple pie is a fruit pie in which the principal filling "
    "ingredient is apples. \n"
    "Apple pie is often served with whipped cream, ice cream "
    "('apple pie à la mode'), custard or cheddar cheese.\n"
    "It is generally double-crusted, with pastry both above "
    "and below the filling; the upper crust may be solid or "
    "latticed (woven of crosswise strips).\n"
    "Answer: YES\n"
    "Information: Apple pies tastes bad.\n"
    "Context: An apple pie is a fruit pie in which the principal filling "
    "ingredient is apples. \n"
    "Apple pie is often served with whipped cream, ice cream "
    "('apple pie à la mode'), custard or cheddar cheese.\n"
    "It is generally double-crusted, with pastry both above "
    "and below the filling; the upper crust may be solid or "
    "latticed (woven of crosswise strips).\n"
    "Answer: NO\n"
    "Information: {query_str}\n"
    "Context: {context_str}\n"
    "Answer: 
"""

'\n    "Please tell if a given piece of information "\n    "is supported by the context.\n"\n    "You need to answer with either YES or NO.\n"\n    "Answer YES if any of the context supports the information, even "\n    "if most of the context is unrelated. "\n    "Some examples are provided below. \n\n"\n    "Information: Apple pie is generally double-crusted.\n"\n    "Context: An apple pie is a fruit pie in which the principal filling "\n    "ingredient is apples. \n"\n    "Apple pie is often served with whipped cream, ice cream "\n    "(\'apple pie à la mode\'), custard or cheddar cheese.\n"\n    "It is generally double-crusted, with pastry both above "\n    "and below the filling; the upper crust may be solid or "\n    "latticed (woven of crosswise strips).\n"\n    "Answer: YES\n"\n    "Information: Apple pies tastes bad.\n"\n    "Context: An apple pie is a fruit pie in which the principal filling "\n    "ingredient is apples. \n"\n    "Apple pie is often served with whipped cream,

In [15]:
llama2_llm = Ollama(model="llama2", base_url="http://127.0.0.1:11434", request_timeout=1000)
llama2_service_context = ServiceContext.from_defaults(llm=llama2_llm, embed_model=embed_model)

In [16]:
from llama_index.legacy.evaluation import FaithfulnessEvaluator
import nest_asyncio
nest_asyncio.apply()
faithfulness_llama2 = FaithfulnessEvaluator(service_context=llama2_service_context)
faithfulness_result = faithfulness_llama2.evaluate_response(response=response)

In [18]:
faithfulness_result

EvaluationResult(query=None, contexts=['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discuss

# Detail around Faithful

In [19]:
print(faithfulness_result.query)

None


In [20]:
len(faithfulness_result.contexts), faithfulness_result.contexts

(10,
 ['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discussion and include examples\nof gen

In [21]:
[nodes.text for nodes in response.source_nodes]

['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discussion and include examples\nof gender-ba

In [22]:
faithfulness_result.passing, faithfulness_result.score

(True, 1.0)

In [23]:
faithfulness_result.response

" In light of the context provided, a student or employee in California experiencing repeated sex-based harassment through social media, such as on Twitter, may follow these steps to seek resolution under Title IX:\n\n1. Reporting: The individual should report the incident to their educational institution's or workplace's designated Title IX coordinator as soon as possible. This can typically be done via an online reporting form, email, or by contacting the Title IX office directly.\n2. Investigation: Upon receiving a report, the institution or workplace is required to conduct a prompt and thorough investigation into the allegations. This may involve interviewing witnesses, gathering evidence, and providing both parties with opportunities to share their perspectives.\n3. Remedial Action: If the investigation determines that sex-based harassment has occurred, the institution or workplace must take appropriate remedial action to address the situation. This could include disciplinary meas

In [24]:
print(faithfulness_result.feedback)

The information provided in the context is present in the existing answer to the question. Therefore, the answer remains YES. The existing answer already indicates that Title IX prohibits any form of sexual harassment that creates a hostile environment for students to learn and participate in school programs, regardless of the sex of the harasser or the student being harassed. The new context provides additional information on the scope of Title IX's protection, including that it applies to both male and female students and that the conduct must be sufficiently serious to limit or deny a student's ability to participate in or benefit from the school's program.


# Factuality using FactKB

In [25]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [26]:
summary = response.response
article = ''
for nodes in response.source_nodes:
    article = article + nodes.text + '\n'
# response_list[0].source_nodes[0].text + response_list[0].source_nodes[1].text
input = [[summary, article]]

In [27]:
input

[[" In light of the context provided, a student or employee in California experiencing repeated sex-based harassment through social media, such as on Twitter, may follow these steps to seek resolution under Title IX:\n\n1. Reporting: The individual should report the incident to their educational institution's or workplace's designated Title IX coordinator as soon as possible. This can typically be done via an online reporting form, email, or by contacting the Title IX office directly.\n2. Investigation: Upon receiving a report, the institution or workplace is required to conduct a prompt and thorough investigation into the allegations. This may involve interviewing witnesses, gathering evidence, and providing both parties with opportunities to share their perspectives.\n3. Remedial Action: If the investigation determines that sex-based harassment has occurred, the institution or workplace must take appropriate remedial action to address the situation. This could include disciplinary me

In [28]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base", padding="max_length", truncation=True)
factkb = AutoModelForSequenceClassification.from_pretrained("bunsenfeng/FactKB", num_labels = 2)
tokens = tokenizer(input, return_tensors="pt", padding="max_length", truncation=True)
result = torch.softmax(factkb(**tokens).logits, dim = 1)
print('The factuality score (0-1, 1 as factual) is: ', float(result[0][1]))

The factuality score (0-1, 1 as factual) is:  0.8038945198059082


# Relevancy

In [29]:
"""
    "Your task is to evaluate if the response for the query \
    is in line with the context information provided.\n"
    "You have two options to answer. Either YES/ NO.\n"
    "Answer - YES, if the response for the query \
    is in line with context information otherwise NO.\n"
    "Query and Response: \n {query_str}\n"
    "Context: \n {context_str}\n"
    "Answer: "
"""

'\n    "Your task is to evaluate if the response for the query     is in line with the context information provided.\n"\n    "You have two options to answer. Either YES/ NO.\n"\n    "Answer - YES, if the response for the query     is in line with context information otherwise NO.\n"\n    "Query and Response: \n {query_str}\n"\n    "Context: \n {context_str}\n"\n    "Answer: "\n'

In [30]:
from llama_index.legacy.evaluation import RelevancyEvaluator
relevancy_llama2 = RelevancyEvaluator(service_context=llama2_service_context)
relevancy_result = relevancy_llama2.evaluate_response(query=question, response=response)

# Details about the Relevancy

In [31]:
relevancy_result

EvaluationResult(query='Here is the example of people\'s experience of getting harassed: - \n"I left my job to return for a PhD with an intention of developing expertise in my field and preventing encounters with men who label me \x93ignorant\x94 or hint to my own incompetence. Part of my PhD requires public engagement via social media. In one such recent Twitter encounter, I stated my opinion and backed it up with facts. A man that disagreed then proceeded to lash out, calling my remarks \x93effete\x94 and \x93ignorant.\x94 I called out the blatant sexism, for which he continued to retaliate, calling my tone condescending. This is such a nasty cycle and I find it so extremely frustrating\x85if we sit back, it happens more. And if we speak up, sexism intensifies. We have our work cut out for us. "\nCan you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in california, \nconsidering that the same case happened in some univerity or in s

In [32]:
relevancy_result.query

'Here is the example of people\'s experience of getting harassed: - \n"I left my job to return for a PhD with an intention of developing expertise in my field and preventing encounters with men who label me \x93ignorant\x94 or hint to my own incompetence. Part of my PhD requires public engagement via social media. In one such recent Twitter encounter, I stated my opinion and backed it up with facts. A man that disagreed then proceeded to lash out, calling my remarks \x93effete\x94 and \x93ignorant.\x94 I called out the blatant sexism, for which he continued to retaliate, calling my tone condescending. This is such a nasty cycle and I find it so extremely frustrating\x85if we sit back, it happens more. And if we speak up, sexism intensifies. We have our work cut out for us. "\nCan you plan and provide the resolution of above harassment/discrimination based on Title IX Implementation in california, \nconsidering that the same case happened in some univerity or in some workspace. Please p

In [33]:
relevancy_result.contexts

['This is consistent with the Court\'s underlying concern\nMost commenters acknowledged that OCR has provided useful factors to determine\nwhether harassing conduct took place "in the context of providing aid,\nbenefits, or services." However, some commenters stated that additional\nclarity and examples regarding the issue were needed. Commenters also suggested\nclarifying references to quid pro quo and hostile environment harassment as\nthese two concepts, though useful, do not determine the issue of whether the\nschool itself is considered responsible for the harassment. We agree with\nthese concerns and have made significant revisions to the sections "Harassment\nthat Denies or Limits a Student\'s Ability to Participate in or Benefit from\nthe Education Program" and "Harassment by Teachers and Other Employees" to\nGender-based Harassment, Including Harassment Predicated on Sex-stereotyping\nSeveral commenters requested that we expand the discussion and include examples\nof gender-ba

In [34]:
relevancy_result.response

" In light of the context provided, a student or employee in California experiencing repeated sex-based harassment through social media, such as on Twitter, may follow these steps to seek resolution under Title IX:\n\n1. Reporting: The individual should report the incident to their educational institution's or workplace's designated Title IX coordinator as soon as possible. This can typically be done via an online reporting form, email, or by contacting the Title IX office directly.\n2. Investigation: Upon receiving a report, the institution or workplace is required to conduct a prompt and thorough investigation into the allegations. This may involve interviewing witnesses, gathering evidence, and providing both parties with opportunities to share their perspectives.\n3. Remedial Action: If the investigation determines that sex-based harassment has occurred, the institution or workplace must take appropriate remedial action to address the situation. This could include disciplinary meas

In [35]:
relevancy_result.passing

True

In [36]:
relevancy_result.feedback

"We agree that the example given matches the california state's title ix implementation in addressing sex-based harassment on campuses. The professor's behavior would be considered sexual harassment and violate Title IX because it is offensive and creates a hostile learning environment for students.\n\nWe concur that the existing answer already YES, so we answer YES to confirm this. If you provide additional context, we can refine our response accordingly."

In [37]:
relevancy_result.score

1.0