In [103]:
import os
from tqdm import tqdm
import re
import pandas as pd
import numpy as np
from pandas import json_normalize
import time
import asyncio
from dotenv import load_dotenv
load_dotenv()
import openai
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser, Document
from langchain.schema.runnable import RunnablePassthrough
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.chains.openai_functions import create_structured_output_runnable
from langchain.output_parsers import PydanticOutputParser

from pydantic import BaseModel, Field
from typing import List, Optional, Sequence, Literal, Tuple

from IPython.display import Markdown, display
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [2]:
from src.utils.utils import dataframe_to_lc_documents, lc_documents_to_dataframe
from src.utils.pydantic_utils import flatten_pydantic_instance
from src.tools.cross_encoder_rerank import rerank_with_cross_encoder

from src.parsing.utils import (
    extract_numbered_segments,
    clean_whitespace,
    split_paragraphs,
    number_segments, 
    get_specs,
    )

from src.tools.relevance_extractor import extract_relevant_passages
from src.tools.subquestion_generator import generate_subquestions
from src.tools.semantic_search import SemanticSearch
from src.parsing.search import eliminate_near_duplicates_df
from src.utils.pydantic_utils import OpenAISchema
from src.tools.splade_search import SpladeSearch

In [152]:
df = pd.read_parquet("data/reddit_legal_cluster_splade.parquet")

In [153]:
test_query = df['body'].head(1).tolist()[0]
print(test_query)

I applied for a job and after two interviews I was given for and agreed to an offer letter, had a start date.  The only remaining variable was a drug test, which I prepared myself for by bringing all my prescriptions with me.  

I take Adderall, my doctor prescribes me for 3 pills a day, insurance pays for only two a day so that’s what I get.  2 pills a day for 30 days.  

However, I don’t need adderall everyday, so I only take it as needed.  My psychiatrist knows this and is okay with it.  I don’t take it everyday so my prescription can last me months, depending on circumstances.  

Of course, I happen to take a drug test for a possible new job the day after taking an adderall at work.  No big deal, I have a prescription and it’s never been a big deal in the past.  

Except this time it was.  According to the doctor who administrated the test, my prescription was too old.  How old was it, I was prescribed in June and filled the prescription on July 8th, 2019.  I still have about a thi

In [6]:
class LegalIssue(BaseModel):
    """Identifying information about a legal issue."""
    topic: str = Field(..., description="A concise topic title for the legal issue")
    summary: Optional[str] = Field(None, description="A summary of key points")

class LegalIssues(BaseModel):
    """Identifying information in a text."""
    issues: Sequence[LegalIssue] = Field(..., description="The information of interest in the text")


async def extract_legal_issues(df: pd.DataFrame, content: str = 'body', metadata: List[str] = None, model_name: str = "gpt-4-1106-preview") -> pd.DataFrame:
    """
    Extracts structured data 'legal issues' from text using a language model.

    Args:
    df (pd.DataFrame): The dataframe to extract legal issues from.
    content (str): The column containing the text content.
    metadata (list): List of columns to use for metadata.
    model_name (str, optional): The name of the language model to use. Defaults to "gpt-4-1106-preview".

    Returns:
    pd.DataFrame: A dataframe containing the extracted legal issues.
    """
    # Check if 'index' column exists in the dataframe, if not, create one based on the actual index
    if 'index' not in df.columns:
        df['index'] = df.index

    lc_docs = dataframe_to_lc_documents(df=df, content=content, id_col="index", metadata=metadata)

    llm = ChatOpenAI(model=model_name, temperature=0.0)

    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a world class algorithm for extracting information in structured formats.",
            ),
            (
                "human",
                "Use the given format to extract information from the following input: {input}",
            ),
            (
                "human", "Tip: Make sure to answer in the correct format"),
        ]
    )

    runnable = create_structured_output_runnable(LegalIssues, llm, prompt)

    results = []
    for doc in lc_docs:
        res_doc = await runnable.ainvoke({"input": doc})
        res_doc_flat = flatten_pydantic_instance(res_doc, "")
        df = pd.DataFrame(res_doc_flat['issues'])
        df['id_'] = doc.metadata['id_']
        results.append(df)

    return pd.concat(results, ignore_index=True)

In [7]:
df_sample = df.sample(5)

In [8]:
test_res = await extract_legal_issues(df_sample, model_name='gpt-3.5-turbo')

In [9]:
print(f"Shape of df: {test_res.shape}")
Markdown(f"{test_res.head(10).to_markdown(index=False)}")

Shape of df: (17, 3)


| topic                       | summary                                                                                                                                                                                                                                                    |   id_ |
|:----------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
| Incident Description        | The user describes a physical altercation between themselves and their girlfriend, as well as a subsequent incident involving a friend of the girlfriend who threatened and physically assaulted the user.                                                 |  5015 |
| Evidence                    | The user mentions having proof of the friend's threats and the girlfriend as a witness to the recent incident.                                                                                                                                             |  5015 |
| Concern                     | The user is unsure if reporting the incident to the Sonic restaurant where the friend works will result in her being fired.                                                                                                                                |  5015 |
| Background                  | The user provides background information about their parents' divorce, custody arrangements, and disputes.                                                                                                                                                 |  4275 |
| Father's Situation          | The user explains that their father got injured on the job and now has a lower-paying job. He believes that child support should be proportional to his wage.                                                                                              |  4275 |
| Mother's Response           | The user mentions that their mother disagrees with the father's request for proportional child support and encourages him to take up a second job. The mother also does not punish the user's brother for his trouble with the law and academic struggles. |  4275 |
| Letter from Mother's Lawyer | The user's father received a letter from the mother's lawyer, which contains requests to change the parenting plan. The user's father wants the user to prepare a counterclaim.                                                                            |  4275 |
| User's Concerns             | The user is unsure if they should get involved in the counterclaim and asks for guidance on the format to use.                                                                                                                                             |  4275 |
| Inheritance                 | Deceased father left inheritance to his kids. Mother-in-law wants kids to sign money over to her to pay off creditors.                                                                                                                                     |  5466 |
| Disputes                    | One relative believes they are entitled to their portion of the inheritance before debts are paid off. Question about legal costs and costs for the disputing relative.                                                                                    |  5466 |

In [10]:
lc_docs = dataframe_to_lc_documents(df=test_res, content='summary', id_col='id_', metadata=['topic'])

In [11]:
reranked = rerank_with_cross_encoder(test_query, lc_docs, 10)

In [12]:
reranked_df = lc_documents_to_dataframe(reranked, 'id_')

In [13]:
print(f"Shape of df: {reranked_df.shape}")
Markdown(f"{reranked_df.to_markdown(index=False)}")

Shape of df: (10, 3)


| topic                                        |   id_ | content                                                                                                                                                                                                                                                    |
|:---------------------------------------------|------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Letter from Mother's Lawyer                  |  4275 | The user's father received a letter from the mother's lawyer, which contains requests to change the parenting plan. The user's father wants the user to prepare a counterclaim.                                                                            |
| Disputes                                     |  5466 | One relative believes they are entitled to their portion of the inheritance before debts are paid off. Question about legal costs and costs for the disputing relative.                                                                                    |
| Mother's Response                            |  4275 | The user mentions that their mother disagrees with the father's request for proportional child support and encourages him to take up a second job. The mother also does not punish the user's brother for his trouble with the law and academic struggles. |
| Concerns about being mistaken as a pedophile |   620 | The user is worried that their actions may make them look like a pedophile and result in arrest.                                                                                                                                                           |
| Engaging with pedophiles                     |   620 | The user has been pretending to be a pedophile and wasting their time.                                                                                                                                                                                     |
| Incident Description                         |  5015 | The user describes a physical altercation between themselves and their girlfriend, as well as a subsequent incident involving a friend of the girlfriend who threatened and physically assaulted the user.                                                 |
| Request for advice                           |   620 | The user is seeking advice on how to handle the situation and their inability to sleep.                                                                                                                                                                    |
| Inheritance                                  |  5466 | Deceased father left inheritance to his kids. Mother-in-law wants kids to sign money over to her to pay off creditors.                                                                                                                                     |
| Concern                                      |  5015 | The user is unsure if reporting the incident to the Sonic restaurant where the friend works will result in her being fired.                                                                                                                                |
| Pedophile ring on Omegle                     |   620 | There is an undercover pedophile ring on Omegle.                                                                                                                                                                                                           |

In [14]:
test_query_clean = clean_whitespace(test_query)
numbered_passage = number_segments(test_query_clean, 2)
print(numbered_passage)

<#1#> I applied for a job and after two interviews I was given for and agreed to an offer letter, had a start date.  The only remaining variable was a drug test, which I prepared myself for by bringing all my prescriptions with me.  

  <#2#> I take Adderall, my doctor prescribes me for 3 pills a day, insurance pays for only two a day so that’s what I get.  2 pills a day for 30 days.  

  <#3#> However, I don’t need adderall everyday, so I only take it as needed.  My psychiatrist knows this and is okay with it. <#4#> I don’t take it everyday so my prescription can last me months, depending on circumstances.  

   Of course, I happen to take a drug test for a possible new job the day after taking an adderall at work. <#5#> No big deal, I have a prescription and it’s never been a big deal in the past.  

   Except this time it was. <#6#> According to the doctor who administrated the test, my prescription was too old.  How old was it, I was prescribed in June and filled the prescription o

In [15]:
query = "Tell me about the user's doctor."

res = extract_relevant_passages(query, test_query, 2)

In [16]:
print(res)

I take Adderall, my doctor prescribes me for 3 pills a day, insurance pays for only two a day so that’s what I get.  2 pills a day for 30 days.

However, I don’t need adderall everyday, so I only take it as needed.  My psychiatrist knows this and is okay with it.  I don’t take it everyday so my prescription can last me months, depending on circumstances.

I still have about a third of a bottle, I take adderall twice a week on average.  This doctor flipped out. 

I politely tried to explain that I don’t take my adderall everyday because I don’t need to and she would no listen.  So once again, I had my psychiatrist send another letter explaining in more detail. 


In [17]:
subquestions = generate_subquestions(test_query, "2 to 3")

In [18]:
subquestions

['What are the rules for prescription drug validity in Texas?',
 'How long is a Schedule II drug prescription valid in Texas?',
 'What are the consequences of having an expired prescription for a Schedule II drug in Texas?']

In [154]:
semantic_search = SemanticSearch(df, 'embeddings')
splade_search = SpladeSearch(df, 'body', 'splade_embeddings')

In [155]:
top_k = []

for i in range(len(subquestions)):
    dense_res = semantic_search.query_similar_documents(subquestions[i], 5, filter_criteria=None)
    top_k.append(dense_res)
    sparce_res = splade_search.splade_search_df(subquestions[i], 5, filter_criteria=None)
    top_k.append(sparce_res)
    
top_k_df = pd.concat(top_k, ignore_index=True)

In [156]:
print(f"shape: {top_k_df.shape}")
top_k_df.head(2)

shape: (30, 17)


Unnamed: 0,index,created_utc,full_link,id,body,title,text_label,flair_label,embeddings,token_count,llm_title,State,kmeans_label,topic_title,splade_embeddings,sim_score,sim_score_sparce
0,1078,1575952538,https://www.reddit.com/r/legaladvice/comments/...,e8lsen,I applied for a job and after two interviews I...,"Failed a drug test due to amphetamines, I have...",employment,5,"[9.475638042064453e-05, 0.0005111666301983955,...",493,"""Validity of Schedule II Drug Prescription in ...",PR,8,Employment Legal Concerns and Issues,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.655581,99.70311
1,9156,1473870804,https://www.reddit.com/r/legaladvice/comments/...,52rcpe,"I'm a Freshman adult, former Texan resident an...",(TN) I'm apparently a POI at my new Uni. Shoul...,criminal,2,"[0.01599887962418506, 0.010982281649642614, 0....",546,"""Legal advice needed: Accused of selling Xanax...",MT,9,Legal Consequences of False Accusations,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.610313,20.996323


In [157]:
top_k_df_dedupe = eliminate_near_duplicates_df(top_k_df, 'body')
len(top_k_df), len(top_k_df_dedupe)

(30, 18)

In [158]:
lc_docs = dataframe_to_lc_documents(top_k_df_dedupe, content='body', id_col='index', metadata=['text_label', 'State'])

In [159]:
len(lc_docs)

18

In [162]:
def get_documents_by_ids(ids: List[str], docs: List[Document]) -> List[Document]:
    """
    Given a list of ids, return the corresponding Document objects.

    Args:
        ids (List[str]): List of document ids.
        docs (List[Document]): List of Document objects.

    Returns:
        List[Document]: List of Document objects corresponding to the ids.
    """
    ids = [int(id_) for id_ in ids] 
    return [doc for doc in docs if doc.metadata['id_'] in ids]


def llm_select_relevant_docs(
    query: str, docs: List[Document], n_matches: int,
) -> List[Document]:
    """
    Given a query and a list of docs, select the docs whose contents match best,
        according to the LLM. Use the doc IDs to select the docs.

    Args:
        query: query string
        docs: list of Document objects
        n_matches: maximum number of matches to return

    Returns:
        list of Document objects
    """
    doc_contents = "\n\n".join(
        [f"DOC: ID={d.metadata['id_']}, CONTENT: {d.page_content}" for d in docs]
    )
    prompt = f"""
    Given the following QUERY: 
    {query}
    and the following DOCS with IDs and contents
    {doc_contents}
    
    Find at most {n_matches} DOCs that are most relevant to the QUERY.
    Return your answer as a sequence of DOC IDS ONLY, for example: 
    "id1 id2 id3..."
    If there are no relevant docs, simply say NONE.
    Even if there is only one relevant doc, return it as a single ID.
    Do not give any explanations or justifications.
    """

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful research assistant helping find relevant documents."},
            {"role": "user", "content": prompt},        
        ],
    )
    
    default_response = Document(page_content="NO_ANSWER")
    
    if response['choices'][0]['message'].content == 'NONE':
        return [default_response]

    ids = response['choices'][0]['message'].content.split()
    
    # Get the documents by ids
    relevant_docs = get_documents_by_ids(ids, docs)
    
    # Add new metadata item
    for doc in relevant_docs:
        doc.metadata['source'] = 'LLM'
    
    return relevant_docs

In [160]:
lc_docs

[Document(page_content='In the 1990s as e-commerce finally started to get going, one of the first things Americans did was buy drugs from Canadian pharmacies. \n\nAmerican drug companies like to say they spend the money on R&amp;D and the rest of the world benefits from the drugs at a lower cost while they\'re forced to charge Americans more to recover their investment. But this makes no sense.  Drugs are developed worldwide, and even the ones developed in the US often undergo heavily government subsidised or even wholly funded research. And why would they sell those drugs to other countries below cost and only recoup those costs in the US? That would be bad business. \n\nFor all the boom and clang about a free market, US drug companies want to be able to charge whatever they can for life saving drugs because if you\'ll die without it you\'ll pay whatever it takes, right?\n\nSo they lobbied the government to put a stop to these cheap drugs coming in from Canada and in 2003 the US feder

In [163]:
res = llm_select_relevant_docs(test_query, lc_docs, n_matches=5)

In [165]:
display(Markdown(f"{test_query}"))

I applied for a job and after two interviews I was given for and agreed to an offer letter, had a start date.  The only remaining variable was a drug test, which I prepared myself for by bringing all my prescriptions with me.  

I take Adderall, my doctor prescribes me for 3 pills a day, insurance pays for only two a day so that’s what I get.  2 pills a day for 30 days.  

However, I don’t need adderall everyday, so I only take it as needed.  My psychiatrist knows this and is okay with it.  I don’t take it everyday so my prescription can last me months, depending on circumstances.  

Of course, I happen to take a drug test for a possible new job the day after taking an adderall at work.  No big deal, I have a prescription and it’s never been a big deal in the past.  

Except this time it was.  According to the doctor who administrated the test, my prescription was too old.  How old was it, I was prescribed in June and filled the prescription on July 8th, 2019.  I still have about a third of a bottle, I take adderall twice a week on average.  This doctor flipped out.  Not only did she refuse to inspect my prescription bottle when I talked to her in person, she demanded my medical records for all the prescriptions I take.  

Both of my doctors complied and when I talked to her on the phone to follow up, her exact words were “You’re prescription is too old, where are you getting it from.”  This pushed back my start date from a definite start to “wait and see” as I was still having my doctors trying to appease the drug test doctor.  

I politely tried to explain that I don’t take my adderall everyday because I don’t need to and she would no listen.  So once again, I had my psychiatrist send another letter explaining in more detail.  
I didn’t hear anything back after that, and when following up with the perspective employer and now they’re having a “hiring freeze” and won’t be filling the position.  


So, in Texas, when is a schedule II drug no longer considered valid?  

TL;DR
Doctor said my prescription was too old and I must be getting amphetamines from an illegal source.

In [166]:
for i in range(len(res)):
    display(Markdown(f"{res[i].page_content}"))

I currently rent a unit in Texas, and when I signed my lease in February I noticed that the lease clearly states that firearms are not allowed in the apartments. I didn't see this as a deal breaker considering I didn't own a firearm at the time (and still don't) and believed I was moving into a safe location.

Come move in day mid August, I was hearing about shootings in the area, and before that, I even heard that a shootout occurred in the parking lot of the very complex I'm renting now just before moving in. It was at this time I regretted signing a lease at a location that didn't allow me to have a firearm for self defense just in case. The shootings seem to have stopped, but I often hear of break-ins and thefts that happen near the complex, and it's only making me worry more (I'm on the first floor, and the first unit you see when coming up to the building). Police presence has been incredibly high since moving in, and I learned this is because there are an unreal amount of crime going on under the surface that I didn't even know was happening, even though it's mainly just drug busts this unsettles me.

Back to the original question, can a landlord in Texas really restrict your right to possess a firearm on the property you're renting?

I have worked a 9-5 Mon-Fri schedule for 6 years. Due to COVID, my supervisor is putting us on alternating 12-hour shifts. I have recently begun a time-release medication for mental health that does not last 12 hours, and one of the side affects is that I am rendered unable to focus/complete work related tasks after this medication wears off. I was told that if I fill out FMLA paperwork to reflect this/have documented confirmation from my physician, I will be told to use my sick leave for the hours I can’t work. Is this legal? Are there any actions I can take regarding this?

I live in South Carolina.

I've seen some guidelines (between 3 and 7 days) but besides DC I can't find a legal limit.

So in my line of work (aircraft maintenance) we are required to have random drug testing per the DoT and FAA. Per the requirement we are notified only just before the test is to be conducted. Unfortunately our company has been saving money by scheduling the test after our shift, sometimes up to 2 hours after, since we work a graveyard shift. I have monthly doctors appointments and recently the random test came very close to interfering with an appointment that are very necessary and hard to reschedule. If I were to not show up to the drug test do attend my doctors appointments would the company have and grounds to terminate me? They say skipping a test is kin to testing positive, but they are also unnecessarily interfering with our personal lives here just to save money. I live in Ohio if that matters, but most of these regs are federal.

This is her first offense. She was driving on the opposite side the road on a two way street. The bus had a stop sign pulled up, but she failed to stop/realize she had to. She also didn't have her insurance card in her possession. She was handed two court summons. What possible penalties does she face? Will she be forced to do 15 days in jail, community service, or have points on her license? Advice and info is much appreciated. 

In [36]:
class Query(BaseModel):
    """Class representing a single question in a query plan."""

    id: int = Field(..., description="Unique id of the query")
    question: str = Field(
        ...,
        description="Question we are asking using a question answer system, if we are asking multiple questions, this question is asked by also providing the answers to the sub questions",
    )
    dependencies: List[int] = Field(
        default_factory=list,
        description="List of sub questions that need to be answered before we can ask the question. Use a subquery when anything may be unknown, and we need to ask multiple questions to get the answer. Dependences must only be other queries.",
    )
    node_type: Literal['SINGLE_QUESTION', 'MERGE_MULTIPLE_RESPONSES'] = Field(
        default='SINGLE_QUESTION',
        description="Type of question, either a single question or a multi-question merge",
    )

class QueryPlan(OpenAISchema):
    """Container class representing a tree of questions to ask a question answering system."""

    query_graph: List[Query] = Field(
        ..., description="The query graph representing the plan"
    )
    
    def _dependencies(self, ids: List[int]) -> List[Query]:
        """Returns the dependencies of a query given their ids."""
        return [q for q in self.query_graph if q.id in ids]



In [37]:
def query_planner(question: str, n: str='3 to 5') -> QueryPlan:
    PLANNING_MODEL = "gpt-3.5-turbo-0613"

    messages = [
        {
            "role": "system",
            "content": "You are a world class query planning algorithm capable of breaking apart questions into its dependency queries such that the answers can be used to inform the parent question. Do not answer the questions, simply provide a correct compute graph with good specific questions to ask and relevant dependencies. Before you call the function, think step-by-step to get a better understanding of the problem.",
        },
        {
            "role": "user",
            "content": f"Consider: {question}\nGenerate the correct query plan with {n} sub-queries.",
        },
    ]

    completion = openai.ChatCompletion.create(
        model=PLANNING_MODEL,
        temperature=0,
        functions=[QueryPlan.openai_schema],
        function_call={"name": QueryPlan.openai_schema["name"]},
        messages=messages,
        max_tokens=1000,
    )
    root = QueryPlan.from_response(completion)
    return root

In [38]:
plan = query_planner(
    test_query
)

In [39]:
from pprint import pprint

pprint(plan.dict())

{'query_graph': [{'dependencies': [],
                  'id': 1,
                  'node_type': 'SINGLE_QUESTION',
                  'question': 'When does a schedule II drug prescription '
                              'expire in Texas?'},
                 {'dependencies': [1],
                  'id': 2,
                  'node_type': 'SINGLE_QUESTION',
                  'question': "What is the expiration date of the user's "
                              'Adderall prescription?'},
                 {'dependencies': [2],
                  'id': 3,
                  'node_type': 'SINGLE_QUESTION',
                  'question': 'How often does the user take Adderall?'},
                 {'dependencies': [2, 3],
                  'id': 4,
                  'node_type': 'SINGLE_QUESTION',
                  'question': "Is the user's Adderall prescription still "
                              'valid?'},
                 {'dependencies': [4],
                  'id': 5,
                  'no

In [40]:
plan

QueryPlan(query_graph=[Query(id=1, question='When does a schedule II drug prescription expire in Texas?', dependencies=[], node_type='SINGLE_QUESTION'), Query(id=2, question="What is the expiration date of the user's Adderall prescription?", dependencies=[1], node_type='SINGLE_QUESTION'), Query(id=3, question='How often does the user take Adderall?', dependencies=[2], node_type='SINGLE_QUESTION'), Query(id=4, question="Is the user's Adderall prescription still valid?", dependencies=[2, 3], node_type='SINGLE_QUESTION'), Query(id=5, question="Why did the doctor claim the user's prescription was too old?", dependencies=[4], node_type='SINGLE_QUESTION')])

In [41]:
plan.query_graph[0].question

'When does a schedule II drug prescription expire in Texas?'

In [132]:
Markdown(f"{test_query}")

I applied for a job and after two interviews I was given for and agreed to an offer letter, had a start date.  The only remaining variable was a drug test, which I prepared myself for by bringing all my prescriptions with me.  

I take Adderall, my doctor prescribes me for 3 pills a day, insurance pays for only two a day so that’s what I get.  2 pills a day for 30 days.  

However, I don’t need adderall everyday, so I only take it as needed.  My psychiatrist knows this and is okay with it.  I don’t take it everyday so my prescription can last me months, depending on circumstances.  

Of course, I happen to take a drug test for a possible new job the day after taking an adderall at work.  No big deal, I have a prescription and it’s never been a big deal in the past.  

Except this time it was.  According to the doctor who administrated the test, my prescription was too old.  How old was it, I was prescribed in June and filled the prescription on July 8th, 2019.  I still have about a third of a bottle, I take adderall twice a week on average.  This doctor flipped out.  Not only did she refuse to inspect my prescription bottle when I talked to her in person, she demanded my medical records for all the prescriptions I take.  

Both of my doctors complied and when I talked to her on the phone to follow up, her exact words were “You’re prescription is too old, where are you getting it from.”  This pushed back my start date from a definite start to “wait and see” as I was still having my doctors trying to appease the drug test doctor.  

I politely tried to explain that I don’t take my adderall everyday because I don’t need to and she would no listen.  So once again, I had my psychiatrist send another letter explaining in more detail.  
I didn’t hear anything back after that, and when following up with the perspective employer and now they’re having a “hiring freeze” and won’t be filling the position.  


So, in Texas, when is a schedule II drug no longer considered valid?  

TL;DR
Doctor said my prescription was too old and I must be getting amphetamines from an illegal source.

In [146]:
splade = SpladeSearch(df, 'body', 'splade_embeddings')

In [148]:
terms = splade.generate_expansion_terms(test_query)

SPLADE generated 81 expansion terms
Top expansion terms: ['frozen', 'dates', 'yes', 'offered', 'variables', 'refused', 'hire', 'dose', 'refusal', 'bottles']


In [194]:
class LegalIssue(BaseModel):
    """Identifying information about a legal issue."""
    
    topic: str = Field(..., description="A concise topic title for the legal issue")
    summary: Optional[str] = Field(None, description="A summary of key points")


class LegalIssues(OpenAISchema):
    """Identifying information in a text."""
    
    issues: Sequence[LegalIssue] = Field(..., description="The information of interest in the text")


def extract_structured_legal_issues(df: pd.DataFrame, content: str = 'body', metadata: List[str] = None, model_name: str = "gpt-4-1106-preview") -> pd.DataFrame:
    """
    Extracts structured data 'legal issues' from text using a language model.

    Args:
    df (pd.DataFrame): The dataframe to extract legal issues from.
    content (str): The column containing the text content.
    metadata (list): List of columns to use for metadata.
    model_name (str, optional): The name of the language model to use. Defaults to "gpt-4-1106-preview".

    Returns:
    pd.DataFrame: A dataframe containing the extracted legal issues.
    """
    # Check if 'index' column exists in the dataframe, if not, create one based on the actual index
    if 'index' not in df.columns:
        df['index'] = df.index

    lc_docs = dataframe_to_lc_documents(df=df, content=content, id_col="index", metadata=metadata)

    results = []
    for doc in lc_docs:
        messages = [
        {
            "role": "system",
            "content": "You are a world class algorithm for extracting information in structured formats.",
        },
        {
            "role": "user",
            "content": f"Use the given format to extract information from the following input: {doc.page_content}",
        },
        ]
        completion = openai.ChatCompletion.create(
            model=model_name,
            temperature=0,
            functions=[LegalIssues.openai_schema],
            function_call={"name": LegalIssues.openai_schema["name"]},
            messages=messages,
        )
        res_doc = LegalIssues.from_response(completion)
        res_doc_flat = flatten_pydantic_instance(res_doc, "")
        df = pd.DataFrame(res_doc_flat['issues'])
        df['id_'] = doc.metadata['id_']
        results.append(df)

    return pd.concat(results, ignore_index=True)

In [195]:
test_res = extract_structured_legal_issues(df_sample)

In [196]:
print(f"Shape of df: {test_res.shape}")
Markdown(f"{test_res.head(10).to_markdown(index=False)}")

Shape of df: (15, 3)


| topic                               | summary                                                                                                                                                                                                                                                 |   id_ |
|:------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
| Domestic Violence                   | The individual describes a physical altercation with his girlfriend, involving pushing and slapping each other, which could be considered domestic violence.                                                                                            |  5015 |
| Assault Threats                     | The individual received threats of physical violence over text message from his girlfriend's friend, which could be considered assault threats.                                                                                                         |  5015 |
| Assault                             | The individual was slapped by his girlfriend's friend while at his place of business, which is an act of assault.                                                                                                                                       |  5015 |
| Workplace Violence                  | The incident of being slapped by the girlfriend's friend occurred at the friend's workplace, which raises issues of workplace violence and potential consequences for the employee.                                                                     |  5015 |
| Child Support Modification          | Father seeks to modify child support payments due to a decrease in income resulting from a job-related injury and subsequent employment change. Mother opposes the modification, suggesting father should take a second job despite his disability.     |  4275 |
| Parenting Plan Dispute              | Father received a letter from mother's lawyer with requests to change the parenting plan. Mother's written responses are allegedly non-factual and slanderous, leading the father to seek assistance in drafting counterclaims.                         |  4275 |
| Custody and Behavioral Issues       | Post-divorce, the mother has custody of the children. The younger brother is facing legal troubles and academic failures, with the mother reportedly not disciplining him and discouraging the father from doing so.                                    |  4275 |
| Legal Involvement of Family Members | The father is requesting the assistance of his child in drafting counterclaims to the mother's lawyer's letter, raising concerns about the appropriateness of involving family members in legal disputes.                                               |  4275 |
| Inheritance and Debt Obligations    | A deceased father left an inheritance to his children, but it is claimed that he owes an amount to creditors that is roughly equal to the inheritance. The mother-in-law wants the children to sign over the inheritance to her to pay off these debts. |  5466 |
| Dispute Over Debt Payment           | While some relatives agree to sign over the inheritance to pay the debts, one relative disagrees, believing they are entitled to their share before debts are settled.                                                                                  |  5466 |