In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import re
from dotenv import load_dotenv
import pandas as pd

from IPython.display import Markdown, display, HTML
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
load_dotenv()
os.chdir(os.path.dirname(os.getcwd()))

In [2]:
summary_df = pd.read_parquet('citation_summary_keywords.parquet')
question_df = pd.read_parquet('citation_questions.parquet')
summary_df.shape, question_df.shape

((117, 6), (659, 4))

In [3]:
test_query = """
Regarding the pollution exclusion clause under the terms of comprehensive general liability (CGL) insurance, \
how is phrase "sudden and accidental' defined and applied given a claim involving gradual but unintentional polluting events.
"""

Markdown(test_query)


Regarding the pollution exclusion clause under the terms of comprehensive general liability (CGL) insurance, how is phrase "sudden and accidental' defined and applied given a claim involving gradual but unintentional polluting events.


In [4]:
summary_df.head(1)

Unnamed: 0,citation,summary,keywords,recency,openai_embeddings_summary,splade_embeddings
0,154 Ill. 2d 90,The case Outboard Marine Corp. v. Liberty Mutu...,"insurance policy interpretation, duty to defen...",,"[-0.01926794834434986, 0.0018745827255770564, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [5]:
question_df.head(1)

Unnamed: 0,citation,question,openai_embeddings_question,splade_embeddings
0,154 Ill. 2d 90,What are the key principles established by the...,"[0.0014819653006270528, -0.01120856124907732, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [6]:
from src.agent.tools.semantic_search import SemanticSearch
from src.agent.tools.splade_search import SparseEmbeddingsSplade
from src.search.query_expansion import segment

semantic_search = SemanticSearch(
    df=summary_df,
    embedding_col_name='openai_embeddings_summary'
)

splade_search = SparseEmbeddingsSplade(
    df=question_df,
    text_column='question',
    splade_column='splade_embeddings'
)

queries = segment(
    test_query,
    n='3 to 5'
)

[32m2024-04-04 23:54:54 - INFO - Using pre-computed 'question' embeddings from existing column: splade_embeddings[0m


In [7]:
queries.searches

[SubQuestion(chain_of_thought="To understand how the phrase 'sudden and accidental' is defined, it's essential to explore the legal interpretations and definitions provided by courts or in the policy language itself.", sub_question_topic="Definition of 'Sudden and Accidental'", sub_question_query="How is the phrase 'sudden and accidental' defined in comprehensive general liability insurance policies?", sub_question_keywords=['sudden and accidental', 'definition', 'CGL insurance', 'pollution exclusion']),
 SubQuestion(chain_of_thought="Given the complexity of pollution events, it's important to understand how courts have historically applied the 'sudden and accidental' clause in cases involving gradual pollution.", sub_question_topic='Application in Gradual Pollution Cases', sub_question_query="How have courts applied the 'sudden and accidental' clause in CGL insurance to cases of gradual pollution?", sub_question_keywords=['sudden and accidental', 'gradual pollution', 'court cases', 'C

In [33]:
vector_results, keyword_results = queries.execute(semantic_search, splade_search)
len(vector_results), len(keyword_results)

[32m2024-04-05 00:11:38 - INFO - 

Thought: To understand how the phrase 'sudden and accidental' is defined, it's essential to explore the legal
interpretations and definitions provided by courts or in the policy language itself.
Search topic: Definition of 'Sudden and Accidental'[0m
[32m2024-04-05 00:11:38 - INFO - Running vector (OpenAI) search on: How is the phrase 'sudden and accidental' defined in comprehensive general liability insurance policies?[0m
[32m2024-04-05 00:11:38 - INFO - Running keyword (SPLADE) search on: sudden and accidental, definition, CGL insurance, pollution exclusion[0m
[32m2024-04-05 00:11:38 - INFO - Returning 10 records from vector search and 10 from keywords[0m
[32m2024-04-05 00:11:38 - INFO - ---------------------------------------------------------------------------[0m
[32m2024-04-05 00:11:38 - INFO - 

Thought: Given the complexity of pollution events, it's important to understand how courts have historically
applied the 'sudden and accidenta

(4, 4)

In [None]:
vector_results = [df.reset_index(drop=False) for df in vector_results]
keyword_results = [df.reset_index(drop=False) for df in keyword_results]

In [82]:
pd.concat(vector_results).shape, pd.concat(keyword_results).shape

((40, 9), (40, 7))

In [83]:
from src.search.doc_joiner import DocJoinerDF

df_joiner = DocJoinerDF(join_mode="reciprocal_rank_fusion", top_k=20)

In [84]:
vector_res = df_joiner.run(vector_results)
keyword_res = df_joiner.run(keyword_results)
vector_res.shape, keyword_res.shape

((19, 9), (20, 7))

In [85]:
vector_res.head()

Unnamed: 0,index,citation,summary,keywords,recency,openai_embeddings_summary,splade_embeddings,search_type,score
33,33,268 Ill. App. 3d 598,The case law from United States Gypsum Co. v. ...,"insurance, claim, coverage, policy, occurrence...",,"[-0.008939363993704319, -0.011452711187303066,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",vector,0.984127
71,71,199 Ill. 2d 281,"General Casualty Insurance Co. v. Lacey, 199 I...","insurance policy, summary judgment, duty to de...",This case has continually influenced the inter...,"[-0.0014027409488335252, 0.010859929956495762,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",vector,0.957829
53,53,643 N.E.2d 1226,"643 N.E.2d 1226, United States Gypsum Co. v. A...","insurance law, continuous trigger approach, pr...",References to 643 N.E.2d 1226 have evolved to ...,"[-0.02118617109954357, -0.003369903890416026, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",vector,0.951231
91,91,363 Ill. App. 3d 335,Liberty Mutual Fire Insurance Co. v. St. Paul ...,"insurance policy, interpretation, obligations,...",,"[-0.006612452678382397, -0.00979774072766304, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",vector,0.928014
40,40,89 Ill. App. 3d 617,The case Aetna Casualty & Surety Co. v. Freyer...,"insurance, accident, definition, unforeseen oc...",The definition of 'accident' established in 89...,"[-0.003400814952328801, -0.015401927754282951,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",vector,0.717857


In [64]:
from src.embedding_models.models import ColbertReranker
from src.search.threadpool import run_functions_tuples_in_parallel

In [86]:
USEFUL_PAT = "Yes useful"
NONUSEFUL_PAT = "Not useful"
CHUNK_FILTER_PROMPT = f"""
Determine if the reference section is USEFUL for answering the user query.
It is good enough for the section to be related or similar to the query, \
it should be relevant information that is USEFUL for comparing to the query.
If the section contains ANY useful information, that is good enough, \
it does not need to fully answer the user query, but it \
should at least address a component to be USEFUL.

Reference Section:
```
{{chunk_text}}
```

User Query:
```
{{user_query}}
```

Respond with EXACTLY AND ONLY: "{USEFUL_PAT}" or "{NONUSEFUL_PAT}"
""".strip()

In [66]:
from langchain.schema.messages import AIMessage
from langchain.schema.messages import BaseMessage
from langchain.schema.messages import HumanMessage
from langchain.schema.messages import SystemMessage

def dict_based_prompt_to_langchain_prompt(
    messages: list[dict[str, str]]
) -> list[BaseMessage]:
    prompt: list[BaseMessage] = []
    for message in messages:
        role = message.get("role")
        content = message.get("content")
        if not role:
            raise ValueError(f"Message missing `role`: {message}")
        if not content:
            raise ValueError(f"Message missing `content`: {message}")
        elif role == "user":
            prompt.append(HumanMessage(content=content))
        elif role == "system":
            prompt.append(SystemMessage(content=content))
        elif role == "assistant":
            prompt.append(AIMessage(content=content))
        else:
            raise ValueError(f"Unknown role: {role}")
    return prompt

In [67]:
from typing import Callable
from langchain_openai import ChatOpenAI

def llm_eval_chunk(query: str, chunk_content: str) -> bool:
    def _get_usefulness_messages() -> list[dict[str, str]]:
        messages = [
            {
                "role": "user",
                "content": CHUNK_FILTER_PROMPT.format(
                    chunk_text=chunk_content, user_query=query
                ),
            },
        ]

        return messages

    def _extract_usefulness(model_output: str) -> bool:
        """Default 'useful' if the LLM doesn't match pattern exactly.
        This is because it's better to trust the (re)ranking if LLM fails"""
        if model_output.content.strip().strip('"').lower() == NONUSEFUL_PAT.lower():
            return False
        return True

    llm = ChatOpenAI(model='gpt-3.5-turbo')

    messages = _get_usefulness_messages()
    filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
    model_output = llm.invoke(filled_llm_prompt)

    return _extract_usefulness(model_output)


def llm_batch_eval_chunks(
    query: str, chunk_contents: list[str], use_threads: bool = True
) -> list[bool]:
    if use_threads:
        functions_with_args: list[tuple[Callable, tuple]] = [
            (llm_eval_chunk, (query, chunk_content)) for chunk_content in chunk_contents
        ]

        print(
            "Running LLM usefulness eval in parallel (following logging may be out of order)"
        )
        parallel_results = run_functions_tuples_in_parallel(
            functions_with_args, allow_failures=True
        )

        # In case of failure/timeout, don't throw out the chunk
        return [True if item is None else item for item in parallel_results]

    else:
        return [
            llm_eval_chunk(query, chunk_content) for chunk_content in chunk_contents
        ]

In [68]:
from llama_index_client import TextNode
from src.search.models import dataframe_to_text_nodes

def filter_chunks(
    query: str,
    chunks_to_filter: list[TextNode],
    max_llm_filter_chunks: int = 20,
) -> list[TextNode]:
    """Filters chunks based on whether the LLM thought they were relevant to the query.

    Args:
        query (str): The query to filter chunks against.
        chunks_to_filter (list[TextNode]): A list of TextNode objects to filter.
        max_llm_filter_chunks (int, optional): The maximum number of chunks to consider. Defaults to 20.

    Returns:
        list[TextNode]: A list of TextNode objects that were marked as relevant.
    """
    chunks_to_filter = chunks_to_filter[: max_llm_filter_chunks]
    llm_chunk_selection = llm_batch_eval_chunks(
        query=query,
        chunk_contents=[chunk.text for chunk in chunks_to_filter],
    )
    return [
        chunk
        for ind, chunk in enumerate(chunks_to_filter)
        if llm_chunk_selection[ind]
    ]

In [69]:
queries.searches

[SubQuestion(chain_of_thought="To understand how the phrase 'sudden and accidental' is defined, it's essential to explore the legal interpretations and definitions provided by courts or in the policy language itself.", sub_question_topic="Definition of 'Sudden and Accidental'", sub_question_query="How is the phrase 'sudden and accidental' defined in comprehensive general liability insurance policies?", sub_question_keywords=['sudden and accidental', 'definition', 'CGL insurance', 'pollution exclusion']),
 SubQuestion(chain_of_thought="Given the complexity of pollution events, it's important to understand how courts have historically applied the 'sudden and accidental' clause in cases involving gradual pollution.", sub_question_topic='Application in Gradual Pollution Cases', sub_question_query="How have courts applied the 'sudden and accidental' clause in CGL insurance to cases of gradual pollution?", sub_question_keywords=['sudden and accidental', 'gradual pollution', 'court cases', 'C

In [89]:
nodes = dataframe_to_text_nodes(
    vector_res,
    id_column='citation', 
    text_col='summary',
    metadata_fields=['citation']
)

filtered_chunks = filter_chunks(test_query, nodes)
print(f"\nReturned {len(filtered_chunks)} nodes")
print(f"{test_query}\n")
for obj in filtered_chunks:
    print(obj)
    print("-" * 50)

Running LLM usefulness eval in parallel (following logging may be out of order)

Returned 12 nodes

Regarding the pollution exclusion clause under the terms of comprehensive general liability (CGL) insurance, how is phrase "sudden and accidental' defined and applied given a claim involving gradual but unintentional polluting events.


Node ID: 268 Ill. App. 3d 598
Text: The case law from United States Gypsum Co. v. Admiral Insurance
Co., 268 Ill. App. 3d 598, clarifies significant concepts in insurance
law across multiple topics, such as the determination of claim
coverage based on third party's claim contents, the interpretation of
'occurrences' using the 'cause' theory, guidance on definitional
aspects of 'oc...
--------------------------------------------------
Node ID: 199 Ill. 2d 281
Text: General Casualty Insurance Co. v. Lacey, 199 Ill. 2d 281 (2002)
is a pivotal case that has been cited extensively to address legal
standards related to insurance policy interpretations, the duty

In [90]:
nodes_kw = dataframe_to_text_nodes(
    keyword_res,
    id_column='citation', 
    text_col='question',
    metadata_fields=['citation']
)

filtered_chunks_kw = filter_chunks(test_query, nodes_kw)
print(f"\nReturned {len(filtered_chunks_kw)} nodes")
print(f"{test_query}\n")
for obj in filtered_chunks_kw:
    print(obj)
    print("-" * 50)

Running LLM usefulness eval in parallel (following logging may be out of order)

Returned 8 nodes

Regarding the pollution exclusion clause under the terms of comprehensive general liability (CGL) insurance, how is phrase "sudden and accidental' defined and applied given a claim involving gradual but unintentional polluting events.


Node ID: 687 N.E.2d 72
Text: What precedent establishes the scope of environmental pollution
exclusions in insurance policies?
--------------------------------------------------
Node ID: 578 N.E.2d 926
Text: What conditions must be met for the standard pollution exclusion
to apply in insurance policies according to this citation?
--------------------------------------------------
Node ID: 757 N.E.2d 481
Text: What constitutes an 'occurrence' under a CGL policy in cases
related to property damage?
--------------------------------------------------
Node ID: 144 Ill. 2d 64
Text: What principles apply to the interpretation of exclusionary
clauses in insurance 

In [96]:
distinct_citations = list(set(node.metadata['citation'] for node in filtered_chunks + filtered_chunks_kw))

In [97]:
len(distinct_citations)

17

In [120]:
from src.search.models import text_nodes_to_dataframe

search_res_df = text_nodes_to_dataframe(nodes)
Markdown(search_res_df.head(1)['text'].tolist()[0])

The case law from United States Gypsum Co. v. Admiral Insurance Co., 268 Ill. App. 3d 598, clarifies significant concepts in insurance law across multiple topics, such as the determination of claim coverage based on third party's claim contents, the interpretation of 'occurrences' using the 'cause' theory, guidance on definitional aspects of 'occurrence' in policies for progressive property damage, issues around settlements following an insurer's breach of duty to defend, principles of indemnification, applying a continuing trigger analysis for pollution damage, assessing the 'continuing process' of manufacturing and selling products as one occurrence, and the importance of cause in determining the number of occurrences. It has been cited to address the need to determine the interpretation of insurance policy terms, the standard for reasonableness of settlements, and ethical issues in insurer-insured relationships.

In [125]:
search_res_df.head()

Unnamed: 0,text,citation
0,The case law from United States Gypsum Co. v. ...,268 Ill. App. 3d 598
1,"General Casualty Insurance Co. v. Lacey, 199 I...",199 Ill. 2d 281
2,"643 N.E.2d 1226, United States Gypsum Co. v. A...",643 N.E.2d 1226
3,Liberty Mutual Fire Insurance Co. v. St. Paul ...,363 Ill. App. 3d 335
4,The case Aetna Casualty & Surety Co. v. Freyer...,89 Ill. App. 3d 617


In [133]:
from src.utils.gen_utils import count_tokens


def create_context(
    df: pd.DataFrame,
    context_token_limit: int = 25000
) -> str:
    """
    Creates a context string from a DataFrame within a specified token limit,
    applying word wrapping to the summary text.

    Args:
        df (pd.DataFrame): The DataFrame containing case data.
        context_token_limit (int): The maximum number of tokens for the context.

    Returns:
        str: A formatted string containing case details within the token limit,
             with word wrapping applied to the summary text.
    """
    import textwrap

    df.reset_index(drop=True, inplace=True)
    returns = []
    count = 1
    total_tokens = 100  # Starting token count to account for initial text.
    # Add the text to the context until the context is too long.
    for _, row in df.iterrows():
        wrapped_summary = textwrap.fill(row['text'], width=80)
        text = (
            f"[{count}] {row['citation']}\n"
            f"Summary: {wrapped_summary}\n"
            "-----------------------------------------\n"
        )
        text_tokens = count_tokens(text)
        if total_tokens + text_tokens > context_token_limit:
            break
        returns.append(text)
        total_tokens += text_tokens
        count += 1
    return "\n\n".join(returns)

In [134]:
context = create_context(search_res_df)

In [141]:
from src.agent.tools.utils import extract_citation_numbers_in_brackets

def create_formatted_input(
    df: pd.DataFrame,
    query: str,
    context_token_limit: int = 25000,
    instructions: str = """Instructions: Working step-by-step using only the provided search results that are relevant, write a detailed and structured analysis to guide the users research.\n\nNew Query:""",
) -> str:

    context = create_context(df)

    try:
        prompt = f"""{context}\n\n{instructions}\n{query}\n\nAnalysis:"""
        prompt_citations = extract_citation_numbers_in_brackets(prompt)
        # print(f"\nCases in prompt: {prompt_citations}\n")
        # print(prompt)
        return prompt
    except Exception as e:
        print(e)
        return ""

In [145]:
from src.agent.tools.utils import ResearchReport
import openai
import instructor
from tenacity import Retrying, stop_after_attempt, wait_fixed


def get_final_answer(formatted_input: str, model_name: str) -> ResearchReport:
    client = instructor.patch(openai.OpenAI())
    return client.chat.completions.create(
        model=model_name,
        response_model=ResearchReport,
        max_retries=Retrying(
            stop=stop_after_attempt(5),
            wait=wait_fixed(1),
        ),
        messages=[
            {
                "role": "system",
                "content": "You are helpful legal research assistant. Working step-by-step, analyze the current legal question, and compare it to the search results of case law references. Using only the provided context, offer insights on applicability of the past case(s) and how the legal researcher can reference them to address the new question. Make sure to use highly structured markdown formatting, and end with a markdown table summarizing the takeaways for each sub-question topic.",
            },
            {
                "role": "user",
                "content": f"Search Results:\n\n{formatted_input}"
            },
        ],
    )


In [146]:
formatted_input = create_formatted_input(search_res_df, test_query, context_token_limit=25000)

response_model = get_final_answer(formatted_input, model_name="gpt-4-turbo-preview")

In [147]:
Markdown(response_model.research_report)

Regarding the interpretation and application of the 'sudden and accidental' phrase within the context of pollution exclusion clauses under Comprehensive General Liability (CGL) insurance policies, especially in cases involving gradual but unintentional pollution events, a detailed analysis of applicable case law is essential. This analysis will explore how this phrase has been defined and applied in past rulings to guide the current enquiry.

**Case Law Analysis:**

- **United States Gypsum Co. v. Admiral Insurance Co., [1] & [3]:** These entries note the application of the 'continuous trigger approach' for progressive and inseparable property damage and the interpretation of insurance coverage through cause analysis. While not directly referencing 'sudden and accidental,' these cases are significant for understanding how courts may approach policy interpretations involving gradual pollution events. The principles of cause analysis and continuous trigger might be indirectly relevant to dissecting the 'sudden and accidental' clause in pollution exclusions.

- **Travelers Insurance Co. v. Eljer Manufacturing, Inc., [6] & [17]:** These references address the construction of insurance policy provisions and interpretation duties of insurers. They establish guidelines that might be applicable when debating the definition and application of 'sudden and accidental' in pollution exclusions, especially since the summary points to discussions around 'occurrence' and 'property damage' in insurance contracts.

- **Zurich Insurance Co. v. Raymark Industries, Inc., [18]:** This case directly deals with the interpretation of policy terms, especially concerning duties to defend and indemnify and trigger of coverage for specific claims like asbestos exposure. While not exclusively concerning pollution, the principles set forth regarding insurance policy interpretation could be instrumental in understanding how 'sudden and accidental' might be construed in the context of pollution.

**Discussion:**

The analyzed cases suggest that the interpretation of 'sudden and accidental' within pollution exclusions likely depends on broader legal principles of insurance policy interpretation, including the continuous trigger for coverage and cause analysis. Since gradual pollution events fall outside the traditional 'sudden' framework, the application of these clauses would require a nuanced approach, examining the specific language of the policy and considering precedents that interpret similar terms.

**Conclusion:**

Given the indirect relevance of the discussed cases, further research into directly related case law or advisory opinions concerning 'sudden and accidental' in the context of CGL policies and pollution exclusions might be necessary. However, these cases provide a foundational understanding of how courts approach complex policy interpretations and might influence arguments concerning the applicability of pollution exclusions to gradual environmental damage.

In [None]:
from src.search.query_expansion import generate_subquestions

questions = generate_subquestions(test_query, n='any number of')
questions.questions

['Should I just pay the rent and wait for my refund?',
 'If the post office shows that my original money order was cashed, am I out that money?',
 'What can I do about a landlord who is slow to make repairs?',
 'Is it legal for my landlord to enter my apartment without permission?',
 'Can a landlord raise rent in response to making repairs?',
 'How do I handle disruptive neighbors?',
 'What should I do if I suspect my landlord of stealing from me?',
 'What are my rights as a tenant in Missouri?',
 'Is it legal to use pliers to turn on water in lieu of a broken knob?',
 "What actions can I take if I've been treated unfairly by my landlord?"]

In [None]:
questions._raw_response.usage

CompletionUsage(completion_tokens=147, prompt_tokens=1130, total_tokens=1277)

In [8]:
from src.search.query_filter import generate_query_plan, auto_filter_fts_search

In [9]:
query_plan = generate_query_plan(
    input_df=df,
    query=test_query,
    filter_fields=[
        'state',
    ]
)
filtered_df = query_plan.filter_df(df=df)

[32m2024-03-19 21:42:45 - INFO - Schema shown to LLM: 
Name of each field, its type and unique values (up to 20):
* state (string);  Values - ['NM' 'IN' 'WY' 'NH' 'MP' 'PA' 'MH' 'ID' 'AR' 'MA' 'KS' 'AS' 'ND' 'PR'
 'DE' 'FL' 'LA' 'OR' 'VT' 'PW'], ... 39 more
        [0m


[32m2024-03-19 21:42:55 - INFO - Input DataFrame has 5,000 rows[0m
[32m2024-03-19 21:42:55 - INFO - Applying filter(s): state LIKE '%OR%'[0m
[32m2024-03-19 21:43:03 - INFO - Filtered DataFrame has 86 rows[0m


In [15]:
filtered_df.head(2)

Unnamed: 0,index,created_utc,full_link,id,body,title,text_label,flair_label,embeddings,token_count,llm_title,state,kmeans_label,topic_title,splade_embeddings
0,2029,1578267399,https://www.reddit.com/r/legaladvice/comments/...,ekl2ef,For context I live in the Philippines. I wont ...,My professor refuses to show us ALL of our tes...,school,9,"[-0.00954271624451715, 0.007157037183387862, 0...",953,"""Unrevealed Grades and Lack of Transparency: S...",OR,9,Legal Consequences of False Accusations,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,3320,1591126549,https://www.reddit.com/r/legaladvice/comments/...,gve3nq,Edit: I live in Washington state.\n\nSo I live...,My landlord has been harassing me about my pet...,housing,7,"[-0.0034782202413053045, 0.00616729225832095, ...",759,"""Legal dispute over pet snake: Landlord threat...",OR,3,Rental Property and Landlord Matters,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [10]:
print(query_plan.original_query)
print(query_plan.rephrased_query)

Do I have any legal recourse here? I know Oregon is an 'at will' state, but it sounds like there are at LEAST two instances that offer grounds for wrongful termination (just based on my limited knowledge of the ADA, dept of labor, BOLI, etc.). 
legal recourse for wrongful termination in 'at will' employment including issues related to mistreatment, health code violations, improper handling of company money, and potential discrimination due to medical conditions


In [13]:
test_res = auto_filter_fts_search(
    df=df,
    query='marijuana',
    top_k=20,
    text_column="body",
    embeddings_column="embeddings",
    filter_fields=[
        'state',
    ])

[32m2024-03-19 21:44:19 - INFO - Schema shown to LLM: 
Name of each field, its type and unique values (up to 20):
* state (string);  Values - ['NM' 'IN' 'WY' 'NH' 'MP' 'PA' 'MH' 'ID' 'AR' 'MA' 'KS' 'AS' 'ND' 'PR'
 'DE' 'FL' 'LA' 'OR' 'VT' 'PW'], ... 39 more
        [0m


[32m2024-03-19 21:44:22 - INFO - No filters were identified for query: marijuana[0m
[32m2024-03-19 21:44:22 - INFO - Revised query: marijuana[0m
[32m2024-03-19 21:44:23 - INFO - Full Text Search (FTS) search yielded a DataFrame with 20 rows[0m


In [14]:
Markdown(test_res['body'].tolist()[0])

I'm a New York State medical marijuana patient. I also work in healthcare. I applied to a new job at a new hospital, and they are discriminating against me for being a medical marijuana patient. I was offered the job and accepted, but when I went to get my pre-employment physical conducted, I gave them my medical marijuana card and informed them that I am a patient. They are now refusing to hire me. Is this legal? I already contacted the division of human rights at the labor department and they said I may or may not have a case.