In [None]:
"""
Author: Markela Zeneli
"""

In [1]:
from collections import Counter
import re
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv

from openai import OpenAI
import goodfire

load_dotenv()

True

In [3]:
df = pd.read_csv('justice.csv')

In [82]:
majority_opinions = df[(df["decision_type"] == "majority opinion")&(df["facts_len"] >= 2500)][["name","term","facts","decision_type","first_party","second_party","first_party_winner","issue_area","facts_len", "majority_vote"]]

In [83]:
majority_opinions["majority_vote"]

464     9
607     9
1593    6
1810    9
1876    5
1926    9
2174    5
2183    6
2191    5
2270    5
2348    9
2399    5
2476    6
2683    8
2969    7
3018    8
3068    9
3086    8
3088    7
3092    9
3096    5
3109    5
3133    6
3147    5
3150    9
3168    9
3176    9
3180    9
3195    6
3205    7
3207    5
3212    5
3216    9
3218    8
3221    9
3228    5
3264    7
3265    9
3272    8
3277    6
Name: majority_vote, dtype: int64

In [11]:
names = majority_opinions['name'].tolist()

terms = majority_opinions['term'].astype(str).tolist()

In [85]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url="https://api.deepseek.com")

deepseek_responses = []

persona = f""""
        I’m going to give you the name and year of a U.S. Supreme Court case. Please tell me what the majority vote count was.
        Here is an example:

        Example input:
        Stanley v. Illinois, 1971

        Example output: 
        5

        Please keep your response to just a single number. No other words.

        Now, here is the real input:

    """

for i in range(len(names)):
    
    content = persona + "Case name: " + names[i] + ", Case year: " + terms[i]

    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": content},
        ],
        stream=False
    )

    deepseek_responses.append(str(response.choices[0].message.content))

In [86]:
deepseek_responses

['9',
 '9',
 '8',
 '9',
 '5',
 '9',
 '5',
 '9',
 '5',
 '5',
 '5',
 '5',
 '5',
 '7',
 '8',
 '9',
 '8',
 '8',
 '7',
 '9',
 '5',
 '5',
 '7',
 '5',
 '9',
 '9',
 '5',
 '8',
 '6',
 '8',
 '5',
 '5',
 '7',
 '8',
 '9',
 '5',
 '7',
 '5',
 '5',
 '6']

In [64]:
deepseek_first_party_winner = []

for i in deepseek_responses:
    if '\nTrue' in i:
        deepseek_first_party_winner.append(True)
    elif '\nFalse' in i:
        deepseek_first_party_winner.append(False)
    else:
        deepseek_first_party_winner.append(np.nan)

In [84]:
df[df["name"]=="Stanley v. Illinois"]["majority_vote"]

1    5
Name: majority_vote, dtype: int64

In [67]:
majority_opinions[["first_party_winner","name"]]

Unnamed: 0,first_party_winner,name
464,False,School Committee of the Town of Burlington v. ...
607,True,Graham v. Connor
1593,True,Sosa v. Alvarez-Machain
1810,True,Jones v. Bock
1876,True,Boumediene v. Bush
1926,True,Fitzgerald v. Barnstable School Committee
2174,False,United States v. Home Concrete & Supply
2183,False,Armour v. City of Indianapolis
2191,False,National Federation of Independent Business v....
2270,True,Trevino v. Thaler


In [89]:
majority_opinions["deepseek_responses"] = deepseek_responses

In [90]:
GOODFIRE_API_KEY = os.getenv("goodfire_key")
client = goodfire.Client(api_key=GOODFIRE_API_KEY)

# Instantiate a model variant. 
variant = goodfire.Variant("meta-llama/Llama-3.3-70B-Instruct")

In [91]:
llama_responses = []

for i in range(len(names)):
    name = names[i]
    term = terms[i]
    response = ""
    for token in client.chat.completions.create(
        [{"role": "user", "content": 
        f'''
        I’m going to give you the name and year of a U.S. Supreme Court case. Please tell me what the majority vote count was.
        Here is an example:

        Example input:
        Stanley v. Illinois, 1971

        Example output: 
        5

        Please keep your response to just a single number. No other words.

        Now, here is the real input:
         
        {name} , {term}
        '''
        }],
        model=variant,
        stream=True,
    ):
        response += token.choices[0].delta.content  # Append tokens to a string
    
    llama_responses.append(response)


In [92]:
llama_responses

['5',
 '9',
 '6',
 '7',
 '5',
 '9',
 '5',
 '6',
 '5',
 '9',
 '9',
 '6',
 '6',
 '7',
 '7',
 '8',
 '9',
 '8',
 '7',
 '9',
 '5',
 '5',
 '7',
 '5',
 '9',
 '9',
 '9',
 '9',
 '6',
 '7',
 '5',
 '5',
 '9',
 '7',
 '9',
 '5',
 '7',
 '7',
 '9',
 '6']

In [115]:
majority_opinions["llama_responses"] = llama_responses

In [96]:
majority_opinions["deepseek_contamination"] = majority_opinions["majority_vote"].astype(str) == majority_opinions["deepseek_responses"]

In [120]:
majority_opinions["llama_contamination"] = majority_opinions["majority_vote"].astype(str) == majority_opinions["llama_responses"]

In [2]:
final_results = pd.read_csv('final_results.csv')

In [101]:
final_results.columns

Index(['Unnamed: 0', 'name', 'term', 'facts', 'decision_type', 'first_party',
       'second_party', 'first_party_winner', 'issue_area', 'facts_len',
       'facts_cleaned', 'first_party_winner_binary', 'llama_prediction',
       'llama_reasoning', 'llama_consistency_predictions',
       'llama_consistency_reasonings', 'llama_consistency_scores',
       'deepseek_prediction', 'deepseek_reasoning',
       'deepseek_consistency_predictions', 'deepseek_consistency_reasonings',
       'deepseek_consistency_scores'],
      dtype='object')

In [99]:
majority_opinions.to_csv('contamination_check_two.csv', index=False)

In [None]:
majority_opinions = pd.read_csv("contamination_check.csv")

In [4]:
final_results["llama_contamination"] = majority_opinions["llama_contamination"].to_list()
final_results["deepseek_contamination"] = majority_opinions["deepseek_contamination"].to_list()

In [10]:
final_results["llama_contamination"].value_counts()

llama_contamination
True     31
False     9
Name: count, dtype: int64

In [11]:
final_results["deepseek_contamination"].value_counts()

deepseek_contamination
True     25
False    15
Name: count, dtype: int64

In [5]:
final_results["llama_accuracy"] = final_results["first_party_winner_binary"] == final_results["llama_prediction"]
final_results["deepseek_accuracy"] = final_results["first_party_winner_binary"] == final_results["deepseek_prediction"]

In [6]:
final_results[(final_results["llama_contamination"]==True)]["llama_accuracy"].value_counts()

llama_accuracy
True     17
False    14
Name: count, dtype: int64

In [7]:
final_results[(final_results["llama_contamination"]==False)]["llama_accuracy"].value_counts()

llama_accuracy
False    7
True     2
Name: count, dtype: int64

In [8]:
final_results[(final_results["deepseek_contamination"]==True)]["deepseek_accuracy"].value_counts()

deepseek_accuracy
False    14
True     11
Name: count, dtype: int64

In [9]:
final_results[(final_results["deepseek_contamination"]==False)]["deepseek_accuracy"].value_counts()

deepseek_accuracy
False    8
True     7
Name: count, dtype: int64