In [None]:
import re
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv

from openai import OpenAI

load_dotenv()

In [2]:
df = pd.read_csv('justice.csv')

In [3]:
df['decision_type'].value_counts()

decision_type
majority opinion                     2829
per curiam                            267
plurality opinion                     153
equally divided                        17
dismissal - rule 46                     9
dismissal - other                       8
dismissal - improvidently granted       6
dismissal - moot                        5
memorandum                              1
opinion of the court                    1
Name: count, dtype: int64

In [4]:
df['issue_area'].value_counts()

issue_area
Criminal Procedure      859
Civil Rights            568
Economic Activity       542
First Amendment         353
Judicial Power          342
Due Process             128
Federalism              125
Privacy                  70
Unions                   60
Federal Taxation         51
Attorneys                37
Miscellaneous            20
Private Action            4
Interstate Relations      2
Name: count, dtype: int64

In [5]:
majority_opinions = df[(df["decision_type"] == "majority opinion")&(df["facts_len"] >= 2500)][["name","term","facts","decision_type","first_party_winner","issue_area","facts_len"]]

In [6]:
majority_opinions

Unnamed: 0,name,term,facts,decision_type,first_party_winner,issue_area,facts_len
464,School Committee of the Town of Burlington v. ...,1984,<p>Under the provisions of the Education of th...,majority opinion,False,Civil Rights,3039
607,Graham v. Connor,1988,"<p>On November 12, 1984, Dethorne Graham, a di...",majority opinion,True,Civil Rights,2643
1593,Sosa v. Alvarez-Machain,2003,<p>A U.S. Drug Enforcement Agency (DEA) specia...,majority opinion,True,Economic Activity,2549
1810,Jones v. Bock,2006,<p>Congress passed the Prisoner Litigation Ref...,majority opinion,True,Criminal Procedure,2758
1876,Boumediene v. Bush,2007,<p>In 2002 Lakhdar Boumediene and five other A...,majority opinion,True,Criminal Procedure,2608
1926,Fitzgerald v. Barnstable School Committee,2008,"<p>In February 2001 Jacqueline Fitzgerald, a k...",majority opinion,True,Civil Rights,2724
2174,United States v. Home Concrete & Supply,2011,<p>Plaintiffs Stephen R. Chandler and Robert L...,majority opinion,False,Federal Taxation,2982
2183,Armour v. City of Indianapolis,2011,"<p>In April of 2001, the City of Indianapolis ...",majority opinion,False,Economic Activity,2877
2191,National Federation of Independent Business v....,2011,"<p>Amid intense public interest, Congress pass...",majority opinion,False,Federalism,2922
2270,Trevino v. Thaler,2012,"<p>On the night of June 9, 1996, Carlos Trevin...",majority opinion,True,Civil Rights,2848


In [7]:
majority_opinions["issue_area"].value_counts()

issue_area
Civil Rights          11
Economic Activity      9
Criminal Procedure     7
Judicial Power         6
Federalism             2
First Amendment        2
Federal Taxation       1
Unions                 1
Miscellaneous          1
Name: count, dtype: int64

In [8]:
facts = majority_opinions['facts']

In [9]:
majority_opinions.iloc[:3096]

Unnamed: 0,name,term,facts,decision_type,first_party_winner,issue_area,facts_len
464,School Committee of the Town of Burlington v. ...,1984,<p>Under the provisions of the Education of th...,majority opinion,False,Civil Rights,3039
607,Graham v. Connor,1988,"<p>On November 12, 1984, Dethorne Graham, a di...",majority opinion,True,Civil Rights,2643
1593,Sosa v. Alvarez-Machain,2003,<p>A U.S. Drug Enforcement Agency (DEA) specia...,majority opinion,True,Economic Activity,2549
1810,Jones v. Bock,2006,<p>Congress passed the Prisoner Litigation Ref...,majority opinion,True,Criminal Procedure,2758
1876,Boumediene v. Bush,2007,<p>In 2002 Lakhdar Boumediene and five other A...,majority opinion,True,Criminal Procedure,2608
1926,Fitzgerald v. Barnstable School Committee,2008,"<p>In February 2001 Jacqueline Fitzgerald, a k...",majority opinion,True,Civil Rights,2724
2174,United States v. Home Concrete & Supply,2011,<p>Plaintiffs Stephen R. Chandler and Robert L...,majority opinion,False,Federal Taxation,2982
2183,Armour v. City of Indianapolis,2011,"<p>In April of 2001, the City of Indianapolis ...",majority opinion,False,Economic Activity,2877
2191,National Federation of Independent Business v....,2011,"<p>Amid intense public interest, Congress pass...",majority opinion,False,Federalism,2922
2270,Trevino v. Thaler,2012,"<p>On the night of June 9, 1996, Carlos Trevin...",majority opinion,True,Civil Rights,2848


In [11]:
facts = facts.str.replace('<p>','')
facts = facts.str.replace('<p dir="ltr">','')
facts = facts.str.replace('</p>','')
facts = facts.str.replace('/n','')
facts = facts.str.replace('<em>','')
facts = facts.str.replace('</em>','')

In [12]:
facts[1876]

'In 2002 Lakhdar Boumediene and five other Algerian natives were seized by Bosnian police when U.S. intelligence officers suspected their involvement in a plot to attack the U.S. embassy there. The U.S. government classified the men as enemy combatants in the war on terror and detained them at the Guantanamo Bay Naval Base, which is located on land that the U.S. leases from Cuba. Boumediene filed a petition for a writ of habeas corpus, alleging violations of the Constitution\'s Due Process Clause, various statutes and treaties, the common law, and international law. The District Court judge granted the government\'s motion to have all of the claims dismissed on the ground that Boumediene, as an alien detained at an overseas military base, had no right to a habeas petition. The U.S. Court of Appeals for the D.C. Circuit affirmed the dismissal but the Supreme Court reversed in Rasul v. Bush, which held that the habeas statute extends to non-citizen detainees at Guantanamo.\nIn 2006, Cong

In [13]:
terms = majority_opinions['term'].astype(str)

In [None]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url="https://api.deepseek.com")

llm_reas_judge = []

for f,t in zip(facts,terms):
    persona = """"
        Your task is to clean the text that proceeds the next appearing ':' character. 
        To clean it, you must remove all information that indicates the Supreme Court's decision. 
        Please note that there are other decisions and verdicts mentioned in the text, which should 
        be retained. It is only the Supreme Court decisions which should be ommitted. Additionally, 
        any Supreme Court decisions that were made prior to the year {t} (if specified), should be retained, not removed.
        This may require splitting a sentence and only removing the judgement part of the 
        sentence, whilst still retaining all other information in the sentence.
        The text to clean is:

    """
    content = persona + f

    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": content},
        ],
        stream=False
    )

    matches = re.search(r'\n\n---\n\n(.*?)\n\n---\n\n', str(response.choices[0].message.content), re.DOTALL)

    if matches:
        result = matches.group(1)
        llm_reas_judge.append(result)
    else:
        print("No content found between the specified substrings.")
        print(response.choices[0].message.content)
        llm_reas_judge.append(np.nan)

In [None]:
llm_reas_judge

NameError: name 'llm_reas_judge' is not defined