In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import re
from dotenv import load_dotenv
import pandas as pd

from IPython.display import Markdown, display, HTML
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
load_dotenv()
os.chdir(os.path.dirname(os.getcwd()))

df = pd.read_parquet("./data/splade_embeds.parquet")
# Convert column names to snake_case for compatibility with LanceDB
original_columns = df.columns
snake_case_columns = {col: re.sub(r'(?<!^)(?=[A-Z])', '_', col).lower() for col in original_columns}
df.rename(columns=snake_case_columns, inplace=True)
df.head(2)

Unnamed: 0,index,created_utc,full_link,id,body,title,text_label,flair_label,embeddings,token_count,llm_title,state,kmeans_label,topic_title,splade_embeddings
0,1078,1575952538,https://www.reddit.com/r/legaladvice/comments/...,e8lsen,I applied for a job and after two interviews I...,"Failed a drug test due to amphetamines, I have...",employment,5,"[9.475638042064453e-05, 0.0005111666301983955,...",493,"""Validity of Schedule II Drug Prescription in ...",PR,8,Employment Legal Concerns and Issues,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,2098,1577442453,https://www.reddit.com/r/legaladvice/comments/...,eg9ll2,"Hi everyone, thanks in advance for any guidanc...","Speeding ticket in Tennessee, Georgia Driver's...",driving,4,"[-0.006706413111028856, 0.020911016696181495, ...",252,"""Speeding ticket consequences for out-of-state...",KY,10,Legal Topics in Traffic Violations,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [2]:
df.sort_values(by='token_count', ascending=False, inplace=True)

In [3]:
test_query = """
A person working for a vacation rental company was called back to work after being temporarily laid off and collecting unemployment for 5 weeks. Upon returning, they observed a lack of adherence to COVID-19 safety protocols among colleagues, including the absence of mask-wearing and social distancing. They were moved to a different workspace due to close proximity issues but still felt unsafe due to the overall disregard for safety measures. The person is aware of a new unemployment provision in Texas allowing individuals not to return to work under certain conditions related to illness and caregiving but does not believe they qualify for these exceptions. They are concerned about safety at work and are seeking advice on their rights or possible actions.
"""

Markdown(test_query)


A person working for a vacation rental company was called back to work after being temporarily laid off and collecting unemployment for 5 weeks. Upon returning, they observed a lack of adherence to COVID-19 safety protocols among colleagues, including the absence of mask-wearing and social distancing. They were moved to a different workspace due to close proximity issues but still felt unsafe due to the overall disregard for safety measures. The person is aware of a new unemployment provision in Texas allowing individuals not to return to work under certain conditions related to illness and caregiving but does not believe they qualify for these exceptions. They are concerned about safety at work and are seeking advice on their rights or possible actions.


In [4]:
from src.parsing.search import find_fuzzy_matches_in_df, find_closest_matches_with_bm25_df

In [5]:
bm25_test = find_closest_matches_with_bm25_df(
    query=test_query,
    df=df,
    top_k=20,
    text_column='body',
)

In [6]:
bm25_test['body'].tolist()[:5]

['I work for a vacation rental company. I got called up the week before last after 5 weeks of being temporarily laid off and collecting unemployment for that long (3 total payments). I came back to work Thursday (5/7) and worked Thursday and Friday.\n\nI was the only one wearing a mask and did not notice any social distancing measures between employees. They *did* have me re-locate to a room upstairs as my normal workspace is in very close proximity to two other employees, but in general, I saw people all day talking face to face and not making any effort to distance themselves. \n\nI know Texas recently added an unemployment provision saying you can opt to not return to work and continue to collect unemployment if you’re sick, caring for someone who is sick, or caring for a child. I do have a 5 year old but my Wife only works 1 day a week and can take him to work, so she’s available for childcare. She will go back to a regular schedule in 2 weeks and my son will have to go to daycare.

In [7]:
from src.search.llm_filter import UsefulLLMFilter

In [8]:
filterer = UsefulLLMFilter()

In [9]:
test = filterer.filter_df_with_llm(
    query=test_query,
    df=bm25_test,
    text_column='body',
)

2024-04-17 23:11:33 - INFO - Input DataFrame has 20 rows
2024-04-17 23:11:33 - INFO - Running LLM usefulness eval in parallel
2024-04-17 23:11:34 - INFO - LLM filtered out 6 rows - output df shape is (14, 17)


In [10]:
from src.search.rank_gpt import RankGPTRerank
from llama_index.llms.openai import OpenAI


llm = OpenAI(model="gpt-3.5-turbo-16k")

reranker = RankGPTRerank(
    top_n=10,
    llm=llm
)

rank_gpt_res = reranker.rerank_dataframe(
    df=bm25_test,
    query=test_query,
    text_column='body',
)

In [11]:
from src.agent.tools.utils import aget_fact_patterns_df
import nest_asyncio
nest_asyncio.apply()

prep_df = await aget_fact_patterns_df(rank_gpt_res, 'body')

Processing summaries:   0%|          | 0/10 [00:00<?, ?it/s]

Processing summaries: 100%|██████████| 10/10 [00:08<00:00,  1.19it/s]


In [12]:
from src.agent.tools.utils import create_formatted_input, get_final_answer

formatted_input = create_formatted_input(
    df=prep_df, 
    query=test_query, 
    text_column='summary',
    context_token_limit=8000
    )


In [13]:
Markdown(formatted_input)

[1] An individual returned to work at a vacation rental company after being laid off and collecting unemployment benefits for 5 weeks. Upon returning, they noticed a lack of COVID-19 safety measures among employees, such as social distancing and mask-wearing, despite being moved to a different workspace for safety. They are aware of a recent unemployment provision in Texas that allows individuals to opt out of returning to work for specific reasons related to COVID-19 but do not believe they qualify under those conditions. The individual is concerned about their safety at work but feels unsure about their options regarding unemployment benefits due to their situation, which is complicated by childcare responsibilities.
URL: https://www.reddit.com/r/legaladvice/comments/ghny7f/what_are_my_unemployment_options_texas/

[2] Workers at a restaurant, adapted for pick-up/delivery with a 2nd make line 8 feet wide, are required to work closely (3 people in 8ft space) due to high demand, contrary to social distancing preferences. Despite air filters and sanitation efforts, discomfort persists over safety. Inquiry regards eligibility for unemployment or Pandemic Unemployment Assistance (PUA) if leaving job due to these safety concerns and the inability to maintain social distancing.
URL: https://www.reddit.com/r/legaladvice/comments/gkgbbc/workplace_refuses_to_follow_social_distancingmi/

[3] An immunocompromised individual, due to a chronic illness, is employed in an IT role that is typically remote but occasionally requires on-site customer visits. The employer, initially accommodating by allowing remote work during the COVID-19 shutdown, is now asking the employee to resume on-site duties. The employee was informed that refusal would be considered resignation. Additionally, during a company shutdown, the employee was unable to access unemployment benefits due to the employer's lack of communication. The employer had previously assured that on-site work would not involve direct customer interaction, a condition not documented in writing. The inquiry concerns potential options available to this employee facing the requirement to return to on-site work despite health concerns.
URL: https://www.reddit.com/r/legaladvice/comments/go79i4/tx_employer_let_me_work_from_the_office_for_the/

[4] An individual's salary was discussed without their permission among coworkers, shared by the accounts payable and office admins. This occurred after their shift was changed due to Covid-19 concerns and facing a hostile work environment from coworkers. The individual is concerned about the legality of the salary discussion without their permission and its impact, considering the ensuing negative perception of their compensation among coworkers.
URL: https://www.reddit.com/r/legaladvice/comments/gm7pw7/ethics_vs_legality/

[5] An individual at risk for COVID-19 expressed concerns over their living situation in a building where pandemic guidelines are not being strictly followed. This includes no enforcement of mask-wearing in common areas with open-air hallways, refusal to close a communal pool, lack of social distancing, and maintenance personnel not wearing masks. The tenant, who is dealing with a chronic illness, feels unsafe and is seeking advice on legal avenues to potentially terminate their lease agreement due to these concerns.
URL: https://www.reddit.com/r/legaladvice/comments/geppms/california_no_one_wearing_masks_pool_is_open_im/

[6] An individual, categorized as a 1099 contractor for a rental housing company for 5 years, raises concerns over the company's failure to deliver promised COVID-19 work incentives, lack of personal protective equipment (PPE), and the necessity of their work being classified as essential during the pandemic. Despite announcements of incentives for contractors, none were provided, specifically to those in roles preparing houses for rental, unlike new contractors on the rehabilitation side who were offered a $250 signup bonus. The individual queries the viability of taking action against the company for exploiting the COVID-19 pandemic to encourage ongoing work without adequate safety measures or promised compensation.
URL: https://www.reddit.com/r/legaladvice/comments/gj3ltq/1099_and_inscentives_not_given/

[7] A person facing financial hardship due to unemployment since early March is preparing for an unemployment appeal hearing. The person, after voluntarily resigning just before the COVID-19 pandemic escalated, was deemed ineligible for unemployment benefits under the criteria of quitting in anticipation of discharge. Seeking to qualify under the rationale of good cause to quit, they recount a situation of being forced to quit due to a he-said-she-said scenario with an ex, leading to an investigation and an ultimatum to quit or face possible termination and police involvement. The reasons cited for quitting under the good cause provision include avoiding potential damage to their criminal and school record, safety concerns relating to the commute, and fears of an unsafe work environment exacerbated by the pandemic. Despite attempting to address these concerns with supervisors and higher management and seeking a transfer, the person was told no alternatives were available, leading them to resign. These actions and decisions are framed within the context of needing to prove that their quitting was motivated by good workplace-related reasons for which the employer is responsible, adverse to the worker, and compelling enough to make a reasonable worker resign.
URL: https://www.reddit.com/r/legaladvice/comments/ge2si7/need_help_with_proving_good_cause_for_quitting_in/

[8] A person began working from home in March due to the pandemic. Shortly afterward, their company implemented a temporary 30-day furlough, during which all employees, regardless of their position, received only 60% of their salary and were asked to work 24 hours a week. After this period, employees were given the option to return to the office to receive full pay, but the individual in question opted to continue working remotely due to ongoing regional stay-at-home orders. Despite performing the same amount of work remotely, the company only offered full salaries to those who returned to the office. The person is concerned about the lack of COVID-19 safety measures in the office, such as wearing masks or conducting health screenings. They are questioning the legality of the company's policy to reduce pay for remote workers who are performing their duties equivalently to their in-office counterparts, with the context of the situation occurring in the U.S.
URL: https://www.reddit.com/r/legaladvice/comments/gsf2gq/company_refusing_to_pay_full_salary_unless_i_come/

[9] An individual was promised a $5,000 bonus upon completing their first year of employment. However, after a 2-month layoff due to the COVID-19 pandemic, the company is asking the individual to forfeit this bonus as a condition for reemployment, threatening termination if the conditions are not accepted. The question arises whether refusing these new terms would still allow the individual to qualify for unemployment benefits in Ohio, especially considering the initial employment agreement included a signed letter promising the bonus.
URL: https://www.reddit.com/r/legaladvice/comments/gjpbku/my_employer_is_requesting_i_forfeit_my_bonus_as_a/

[10] The query addresses a situation involving an individual's significant other (SO) who works in an essential business and is at risk due to cardiovascular conditions amidst the COVID-19 pandemic. Despite a relatively stable situation initially, the area is experiencing a worsening outbreak. The person is inquiring about the possibility of their SO obtaining a doctor's note to seek unemployment benefits if quitting the job becomes necessary due to health risks. Additionally, the individual is questioning their eligibility for the Family and Medical Leave Act (FMLA) to potentially take leave from a customer-facing job that could expose them to the virus, given their concern for the SO's health.
URL: https://www.reddit.com/r/legaladvice/comments/gc3oby/can_an_atrisk_individual_get_unemployment_if_they/

Instructions: Using only the provided search results that are relevant, and starting with the most relevant, write a detailed comparative analysis for a new query. If there are no relevant cases say so, and use one example from the search results to illustrate the uniquness of the new query. ALWAYS cite search results using [[number](URL)] notation after the reference.

New Query:

A person working for a vacation rental company was called back to work after being temporarily laid off and collecting unemployment for 5 weeks. Upon returning, they observed a lack of adherence to COVID-19 safety protocols among colleagues, including the absence of mask-wearing and social distancing. They were moved to a different workspace due to close proximity issues but still felt unsafe due to the overall disregard for safety measures. The person is aware of a new unemployment provision in Texas allowing individuals not to return to work under certain conditions related to illness and caregiving but does not believe they qualify for these exceptions. They are concerned about safety at work and are seeking advice on their rights or possible actions.


Analysis:

In [14]:
response_model = get_final_answer(formatted_input, model_name="gpt-4-turbo-preview")

In [15]:
response_model.model_dump()

{'research_report': 'The current query involves an individual who has returned to their workplace, a vacation rental company, after a temporary layoff and is concerned about the lack of COVID-19 safety protocols among employees. This situation raises questions about employee rights, unemployment benefits, and workplace safety amid the pandemic, particularly under Texas law. \n\nComparatively, the most directly relevant case is [1], where an individual in a similar situation of returning to work at a vacation rental company observed a lack of COVID-19 safety measures and questioned their options regarding unemployment benefits. Both scenarios involve concerns about workplace safety and the implications of returning to work under potentially unsafe conditions. \n\nThe query also shares similarities with [2], where employees at a restaurant faced a cramped working environment that contradicted social distancing guidelines, raising questions about their eligibility for unemployment or Pand

In [16]:
display(Markdown(response_model.research_report))

The current query involves an individual who has returned to their workplace, a vacation rental company, after a temporary layoff and is concerned about the lack of COVID-19 safety protocols among employees. This situation raises questions about employee rights, unemployment benefits, and workplace safety amid the pandemic, particularly under Texas law. 

Comparatively, the most directly relevant case is [1], where an individual in a similar situation of returning to work at a vacation rental company observed a lack of COVID-19 safety measures and questioned their options regarding unemployment benefits. Both scenarios involve concerns about workplace safety and the implications of returning to work under potentially unsafe conditions. 

The query also shares similarities with [2], where employees at a restaurant faced a cramped working environment that contradicted social distancing guidelines, raising questions about their eligibility for unemployment or Pandemic Unemployment Assistance (PUA) if they chose to leave due to safety concerns. 

In [3], an immunocompromised individual faced a dilemma about returning to on-site work amidst health concerns, showcasing another angle of how employees might deal with being asked to work in conditions they perceive as unsafe due to the pandemic. This parallels the current query by highlighting the tension between employer expectations and employee safety concerns. 

While cases such as [4] and [5] discuss related issues of workplace and living environment concerns during COVID-19, they are more peripheral, focusing on interpersonal and lease-related issues rather than the direct concern of returning to work under unsafe conditions. 

Overall, the present situation aligns closely with concerns raised in [1], [2], and [3], which explore the intersections of workplace safety, unemployment benefits, and employee rights during the pandemic. These cases offer insights into the complexity of employment decisions motivated by health and safety concerns, underlining the importance of understanding specific legal provisions and rights available to workers.

In [17]:
display(Markdown(str(response_model.get_formatted_citations)))

* [[1](https://www.reddit.com/r/legaladvice/comments/ghny7f/what_are_my_unemployment_options_texas/)] - Individual's concerns about COVID-19 safety at a vacation rental company (Texas).
* [[2](https://www.reddit.com/r/legaladvice/comments/gkgbbc/workplace_refuses_to_follow_social_distancingmi/)] - Workers' safety concerns and unemployment eligibility in a restaurant environment.
* [[3](https://www.reddit.com/r/legaladvice/comments/go79i4/tx_employer_let_me_work_from_the_office_for_the/)] - Immunocompromised employee's concerns about returning to on-site work (Texas).

In [29]:
Markdown(response_model.context_citations[1].get_markdown_citation)

[3](https://www.reddit.com/r/legaladvice/comments/gnrqby/if_i_have_a_medical_marijuana_card_can_i_be_fired/) - Medical Marijuana and Workplace Drug Testing Policies in Pennsylvania, Reddit Legal Advice.

In [49]:

def clean_string(string):
    # Remove spaces and special characters using regex
    string = re.sub("[^A-Za-z0-9]+", "", string)
    # Convert the string to lowercase
    string = string.lower()
    return string

def generate_citation_strings(
    citation_numbers: List[str],
    df: pd.DataFrame,
    location_column: str,
    date_column: str,
) -> List[str]:
    result = []
    for citation in citation_numbers:
        i = int(citation) - 1  # convert string to int and adjust for 0-indexing
        title = df.iloc[i]["llm_title"]
        claim_number = df.iloc[i]["id"]

        claim_number_clean = clean_string(str(claim_number))
        link = f"={claim_number_clean}"
        claim_number_formatted = f"[{claim_number}]({link})"

        venue = str(df.iloc[i][location_column])
        date = "2022 Jan"
        result.append(
            f"**{[i+1]}** *{title}* - {venue}, {date}, Claim Number: {claim_number_formatted}\n\n"
        )
    return result

In [50]:
from src.agent.tools.utils import extract_citation_numbers_in_brackets


cites = extract_citation_numbers_in_brackets(
    response_model.research_report
)

cite_strings = generate_citation_strings(
    citation_numbers=cites,
    df=prep_df,
    location_column='state',
    date_column='date'
)

In [51]:
cites

['14', '15', '1']

In [52]:
Markdown(cite_strings[2])

**[1]** *"Legal implications of company drug policies and medical marijuana cards in Arizona"* - PA, 2022 Jan, Claim Number: [e8y7gr](=e8y7gr)



In [12]:
fuzzy_test = find_fuzzy_matches_in_df(
    query='marijuana',
    df=df,
    k=10,
    text_column='body',
    words_after=5,
    words_before=5,
)

In [13]:
fuzzy_test['context'].tolist()

['father conceal loads of drugs, marijuana and some black looking drug,',
 'years for a possession of marijuana charge. I’ve violated a couple',
 'damaged property, and constantly had marijuana in her possession on the',
 'tolerance for the use of marijuana regardless of local laws. I',
 'the roommates getting arrested for marijuana charges on the property and',
 "state recreational use of Marijuana is legal and I'm unsure if",
 'I know it’s is only marijuana, but in very large amounts.',
 'small amount of dabs or marijuana concentrate with me. Me and',
 'to deal with. He smokes marijuana and either he smokes so',
 'was volunteering at a medicinal marijuana collective to help out a']

In [14]:
from src.search.query_expansion import segment_search_query

queries = segment_search_query(
    test_query,
)

In [15]:
queries.searches

[SubQuestion(chain_of_thought="To understand if an employer's actions constitute illegal discrimination, we need to explore the legal protections for medical marijuana patients in the workplace. This involves examining federal and state laws regarding medical marijuana use and employment discrimination.", sub_question_topic='Legal Protections for Medical Marijuana Patients', sub_question_query='What legal protections exist for medical marijuana patients in the workplace?', sub_question_keywords=['medical marijuana', 'workplace', 'legal protections']),
 SubQuestion(chain_of_thought="Since federal law, including the Controlled Substances Act, may conflict with state laws that legalize medical marijuana, it's important to understand how these conflicting laws impact employment rights.", sub_question_topic='Federal vs. State Law on Medical Marijuana', sub_question_query='How do federal and state laws regarding medical marijuana use conflict, and what impact does this have on employment rig

In [17]:
from src.agent.tools.semantic_search import SemanticSearch
from src.agent.tools.splade_search import SPLADESparseSearch
from src.search.query_expansion import segment_search_query

semantic_search = SemanticSearch(
    df=df,
    embedding_column='embeddings'
)

splade_search = SPLADESparseSearch(
    df=df,
    text_column='title',
    embedding_column='splade_embeddings'
)

2024-04-17 19:15:23 - INFO - Using pre-computed 'title' embeddings from existing column: splade_embeddings


In [18]:
vector_results, keyword_results = queries.execute(semantic_search, splade_search)

2024-04-17 19:15:30 - INFO - 

Thought: To understand if an employer's actions constitute illegal discrimination, we need to explore the
legal protections for medical marijuana patients in the workplace. This involves examining federal
and state laws regarding medical marijuana use and employment discrimination.
Search topic: Legal Protections for Medical Marijuana Patients
2024-04-17 19:15:30 - INFO - Running vector (OpenAI) search: What legal protections exist for medical marijuana patients in the workplace?


TypeError: SemanticSearch.query_similar_documents() got an unexpected keyword argument 'top_n'

In [33]:
from src.agent.tools.utils import aget_fact_patterns_df
import nest_asyncio
nest_asyncio.apply()

prep_df = await aget_fact_patterns_df(hybrid_results, 'body', "id")

Processing summaries:   0%|          | 0/27 [00:00<?, ?it/s]

Processing summaries: 100%|██████████| 27/27 [00:07<00:00,  3.79it/s]


In [34]:
prep_df.head(2)

Unnamed: 0,index,created_utc,full_link,id,body,title,text_label,flair_label,token_count,llm_title,state,kmeans_label,topic_title,clean_text,search_type,_relevance_score,summary
0,8075,1590524169,https://www.reddit.com/r/legaladvice/comments/...,36598d2ca979f02397b530a5c36d7a2a,I'm a New York State medical marijuana patient...,Is it legal in New York to discriminate agains...,employment,5,114,"""Can a hospital refuse to hire a medical marij...",MI,8,Employment Legal Concerns and Issues,new york state medical marijuana patient also ...,splade,0.821348,"A New York State medical marijuana patient, wh..."
1,7331,1576018396,https://www.reddit.com/r/legaladvice/comments/...,b20c68f342dceb310b1c8190753aa3af,"Hello,\n\nI was considering getting a medical ...",Considering getting a medical marijuana card- ...,employment,5,276,"""Legal implications of company drug policies a...",PA,8,Employment Legal Concerns and Issues,hello considering getting medical marijuana ca...,vector,0.669316,An individual considering obtaining a medical ...


In [35]:
from src.agent.tools.utils import create_formatted_input, get_final_answer

formatted_input = create_formatted_input(
    prep_df, test_query, context_token_limit=8000
    )

response_model = get_final_answer(formatted_input, model_name="gpt-4-turbo-preview")

In [36]:
Markdown(response_model.research_report)

The query raises the issue of employment-related discrimination based on an individual's status as a medical marijuana patient. This concern intersects with several laws and regulations, including but not limited to, employment discrimination laws, medical marijuana laws at the state level, and policies regarding drug testing in the workplace. 

The most directly relevant case is the first search result. This individual was offered a job in New York, which was later rescinded upon disclosure of their medical marijuana patient status during a pre-employment physical. They reached out to the division of human rights at the labor department for guidance, indicating a potential legal issue surrounding employment discrimination based on medical marijuana use [[1](https://www.reddit.com/r/legaladvice/comments/gr4q8s/is_it_legal_in_new_york_to_discriminate_against/)].

Another pertinent case comes from Arizona, where an individual debated the ramifications of obtaining a medical marijuana card on their employment due to their company's zero-tolerance drug policy [[2](https://www.reddit.com/r/legaladvice/comments/e8y7gr/considering_getting_a_medical_marijuana_card/)]. This scenario mirrors the query by highlighting the tension between state-legalized medical marijuana and employer drug policies. 

The situation described in Pennsylvania also sheds light on this issue. The individual, a medical marijuana cardholder, expressed concerns about drug testing during the hiring process and potential repercussions upon failing the test due to their legitimate medical use of marijuana [[4](https://www.reddit.com/r/legaladvice/comments/gnrqby/if_i_have_a_medical_marijuana_card_can_i_be_fired/)].

Finally, the scenario in New Jersey presents a scenario where prescribed medication (though not specifically marijuana) constituted grounds for a job offer being rescinded, which parallels concerns about discrimination based on medical treatments or prescriptions [[6](https://www.reddit.com/r/legaladvice/comments/gnpjof/nj_i_was_denied_a_job_due_to_being_prescribed/)].

These cases collectively illustrate the complexity of navigating employment and medical marijuana use. They suggest a delicate balance between complying with state laws that permit medical marijuana and adhering to employer drug policies that may not differentiate between recreational and medical use. The legal landscape around this issue appears to be evolving, and the specifics may vary significantly depending on jurisdiction, making it imperative to consider local laws and regulations alongside federal standards when assessing legality.

In [None]:
assert 1 + 1 == 3

AssertionError: 

In [None]:
from src.search.query_expansion import generate_subquestions

questions = generate_subquestions(test_query, n='any number of')
questions.questions

['Should I just pay the rent and wait for my refund?',
 'If the post office shows that my original money order was cashed, am I out that money?',
 'What can I do about a landlord who is slow to make repairs?',
 'Is it legal for my landlord to enter my apartment without permission?',
 'Can a landlord raise rent in response to making repairs?',
 'How do I handle disruptive neighbors?',
 'What should I do if I suspect my landlord of stealing from me?',
 'What are my rights as a tenant in Missouri?',
 'Is it legal to use pliers to turn on water in lieu of a broken knob?',
 "What actions can I take if I've been treated unfairly by my landlord?"]

In [None]:
questions._raw_response.usage

CompletionUsage(completion_tokens=147, prompt_tokens=1130, total_tokens=1277)

In [None]:
from src.search.query_filter import generate_query_plan, auto_filter_fts_search

In [None]:
query_plan = generate_query_plan(
    input_df=df,
    query=test_query,
    filter_fields=[
        'state',
    ]
)
filtered_df = query_plan.filter_df(df=df)

[32m2024-03-19 21:42:45 - INFO - Schema shown to LLM: 
Name of each field, its type and unique values (up to 20):
* state (string);  Values - ['NM' 'IN' 'WY' 'NH' 'MP' 'PA' 'MH' 'ID' 'AR' 'MA' 'KS' 'AS' 'ND' 'PR'
 'DE' 'FL' 'LA' 'OR' 'VT' 'PW'], ... 39 more
        [0m


[32m2024-03-19 21:42:55 - INFO - Input DataFrame has 5,000 rows[0m
[32m2024-03-19 21:42:55 - INFO - Applying filter(s): state LIKE '%OR%'[0m
[32m2024-03-19 21:43:03 - INFO - Filtered DataFrame has 86 rows[0m


In [None]:
filtered_df.head(2)

Unnamed: 0,index,created_utc,full_link,id,body,title,text_label,flair_label,embeddings,token_count,llm_title,state,kmeans_label,topic_title,splade_embeddings
0,2029,1578267399,https://www.reddit.com/r/legaladvice/comments/...,ekl2ef,For context I live in the Philippines. I wont ...,My professor refuses to show us ALL of our tes...,school,9,"[-0.00954271624451715, 0.007157037183387862, 0...",953,"""Unrevealed Grades and Lack of Transparency: S...",OR,9,Legal Consequences of False Accusations,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,3320,1591126549,https://www.reddit.com/r/legaladvice/comments/...,gve3nq,Edit: I live in Washington state.\n\nSo I live...,My landlord has been harassing me about my pet...,housing,7,"[-0.0034782202413053045, 0.00616729225832095, ...",759,"""Legal dispute over pet snake: Landlord threat...",OR,3,Rental Property and Landlord Matters,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [None]:
print(query_plan.original_query)
print(query_plan.rephrased_query)

Do I have any legal recourse here? I know Oregon is an 'at will' state, but it sounds like there are at LEAST two instances that offer grounds for wrongful termination (just based on my limited knowledge of the ADA, dept of labor, BOLI, etc.). 
legal recourse for wrongful termination in 'at will' employment including issues related to mistreatment, health code violations, improper handling of company money, and potential discrimination due to medical conditions


In [None]:
test_res = auto_filter_fts_search(
    df=df,
    query='marijuana',
    top_k=20,
    text_column="body",
    embeddings_column="embeddings",
    filter_fields=[
        'state',
    ])

[32m2024-03-19 21:44:19 - INFO - Schema shown to LLM: 
Name of each field, its type and unique values (up to 20):
* state (string);  Values - ['NM' 'IN' 'WY' 'NH' 'MP' 'PA' 'MH' 'ID' 'AR' 'MA' 'KS' 'AS' 'ND' 'PR'
 'DE' 'FL' 'LA' 'OR' 'VT' 'PW'], ... 39 more
        [0m


[32m2024-03-19 21:44:22 - INFO - No filters were identified for query: marijuana[0m
[32m2024-03-19 21:44:22 - INFO - Revised query: marijuana[0m
[32m2024-03-19 21:44:23 - INFO - Full Text Search (FTS) search yielded a DataFrame with 20 rows[0m


In [None]:
Markdown(test_res['body'].tolist()[0])

I'm a New York State medical marijuana patient. I also work in healthcare. I applied to a new job at a new hospital, and they are discriminating against me for being a medical marijuana patient. I was offered the job and accepted, but when I went to get my pre-employment physical conducted, I gave them my medical marijuana card and informed them that I am a patient. They are now refusing to hire me. Is this legal? I already contacted the division of human rights at the labor department and they said I may or may not have a case.