# Author: Mindy Ng

In [59]:
!pip install goodfire --quiet

In [60]:
# API Key
from kaggle_secrets import UserSecretsClient

# Data Procesing
import numpy as np
import pandas as pd

# Statistics
import scipy.stats as stats
import statsmodels.stats.power as smp
from scipy.stats import binomtest

# Visualization
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

import networkx as nx
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter
nltk.download('stopwords', quiet=True)
nltk.download('punkt', quiet=True)

from sklearn.metrics import confusion_matrix

# Mechanistic Interpretability
import goodfire

# Data

In [61]:
df = pd.read_csv('/kaggle/input/hackathon/final_results.csv')
df.head(3)

Unnamed: 0.1,Unnamed: 0,name,term,facts,decision_type,first_party,second_party,first_party_winner,issue_area,facts_len,...,llama_prediction,llama_reasoning,llama_consistency_predictions,llama_consistency_reasonings,llama_consistency_scores,deepseek_prediction,deepseek_reasoning,deepseek_consistency_predictions,deepseek_consistency_reasonings,deepseek_consistency_scores
0,464,School Committee of the Town of Burlington v. ...,1984,<p>Under the provisions of the Education of th...,majority opinion,"School Committee of Burlington, Massachusetts,...",Department of Education of the Commonwealth of...,False,Civil Rights,3039,...,1,The Education of the Handicapped Act requires...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" The Education of the Handicapped Act requir...",100.0,0,The reasoning for this decision is based on t...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The reasoning for this decision is based on...",100.0
1,607,Graham v. Connor,1988,"<p>On November 12, 1984, Dethorne Graham, a di...",majority opinion,Dethorne Graham,M.S. Connor,True,Civil Rights,2643,...,1,I rule in favor of Dethorne Graham because th...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' I rule in favor of Dethorne Graham because ...,100.0,1,The facts of the case demonstrate that Dethor...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[' The facts of the case demonstrate that Deth...,100.0
2,1593,Sosa v. Alvarez-Machain,2003,<p>A U.S. Drug Enforcement Agency (DEA) specia...,majority opinion,Jose Francisco Sosa,"Humberto Alvarez-Machain, et al.",True,Economic Activity,2549,...,0,The case presented involves Jose Francisco So...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The case centers around Humberto Alvarez-Ma...",100.0,0,The Alien Tort Statute (ATS) provides federal...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The Alien Tort Statute (ATS) provides feder...",100.0


In [62]:
df.isnull().sum()

Unnamed: 0                          0
name                                0
term                                0
facts                               0
decision_type                       0
first_party                         0
second_party                        0
first_party_winner                  0
issue_area                          0
facts_len                           0
facts_cleaned                       0
first_party_winner_binary           0
llama_prediction                    0
llama_reasoning                     0
llama_consistency_predictions       0
llama_consistency_reasonings        0
llama_consistency_scores            0
deepseek_prediction                 0
deepseek_reasoning                  0
deepseek_consistency_predictions    0
deepseek_consistency_reasonings     0
deepseek_consistency_scores         0
dtype: int64

In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 22 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Unnamed: 0                        40 non-null     int64  
 1   name                              40 non-null     object 
 2   term                              40 non-null     int64  
 3   facts                             40 non-null     object 
 4   decision_type                     40 non-null     object 
 5   first_party                       40 non-null     object 
 6   second_party                      40 non-null     object 
 7   first_party_winner                40 non-null     bool   
 8   issue_area                        40 non-null     object 
 9   facts_len                         40 non-null     int64  
 10  facts_cleaned                     40 non-null     object 
 11  first_party_winner_binary         40 non-null     int64  
 12  llama_pred

# Stats and Viz

In [83]:
# 1. Visualization: Cases by Issue Area Over Time
def cases_by_issue_area_over_time(df):
    # Group by term and issue_area to count cases
    issue_counts = df.groupby(['term', 'issue_area']).size().reset_index(name='count')
    
    # Create a line chart showing trends by issue area over time
    fig = px.line(issue_counts, x='term', y='count', color='issue_area',
                  markers=True, line_shape='linear',
                  title='Supreme Court Cases by Issue Area Over Time',
                  labels={'term': 'Term Year', 'count': 'Number of Cases', 'issue_area': 'Issue Area'})
    
    fig.update_layout(
        template='plotly_dark',
        legend_title_text='Issue Area',
        xaxis=dict(tickmode='linear', dtick=5),
        hovermode='x unified',
        height=600
    )
    
    return fig

# 2. Visualization: First Party Win Rate by Issue Area
def win_rate_by_issue_area(df):
    # Group by issue_area and calculate win rate
    win_rate = df.groupby('issue_area')['first_party_winner'].agg(['mean', 'count']).reset_index()
    win_rate = win_rate.rename(columns={'mean': 'win_rate', 'count': 'total_cases'})
    win_rate['win_rate'] = win_rate['win_rate'] * 100  # Convert to percentage
    
    # Sort by win rate
    win_rate = win_rate.sort_values('win_rate', ascending=False)
    
    # Create a horizontal bar chart
    fig = px.bar(win_rate, 
                y='issue_area', 
                x='win_rate',
                text=win_rate['win_rate'].round(1).astype(str) + '%',
                color='total_cases',
                orientation='h',
                title='First Party Win Rate by Issue Area (%)',
                labels={'issue_area': 'Issue Area', 'win_rate': 'Win Rate (%)', 'total_cases': 'Total Cases'},
                color_continuous_scale='Viridis')
    
    fig.update_traces(textposition='outside')
    fig.update_layout(
        template='plotly_dark',
        xaxis_range=[0, 100],
        height=500,
        yaxis=dict(autorange="reversed")
    )
    
    return fig

# 3. Visualization: Trend in First Party Win Rate Over Time
def win_rate_over_time(df):
    # Group by term and calculate win rate
    term_win_rate = df.groupby('term')['first_party_winner'].agg(['mean', 'count']).reset_index()
    term_win_rate = term_win_rate.rename(columns={'mean': 'win_rate', 'count': 'total_cases'})
    term_win_rate['win_rate'] = term_win_rate['win_rate'] * 100  # Convert to percentage
    
    # Create a combined visualization with win rate line and case count bars
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    # Add win rate line
    fig.add_trace(
        go.Scatter(
            x=term_win_rate['term'], 
            y=term_win_rate['win_rate'],
            mode='lines+markers',
            name='Win Rate (%)',
            line=dict(color='#00ffff', width=3),
            marker=dict(size=10, color='#00ffff')
        ),
        secondary_y=False
    )
    
    # Add case count bars
    fig.add_trace(
        go.Bar(
            x=term_win_rate['term'],
            y=term_win_rate['total_cases'],
            opacity=0.7,
            name='Total Cases',
            marker_color='#ff9900'
        ),
        secondary_y=True
    )
    
    # Update layout
    fig.update_layout(
        title_text='First Party Win Rate and Case Count by Term',
        template='plotly_dark',
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        hovermode='x unified',
        height=500
    )
    
    # Set axis titles
    fig.update_yaxes(title_text="Win Rate (%)", secondary_y=False, range=[0, 100])
    fig.update_yaxes(title_text="Number of Cases", secondary_y=True)
    fig.update_xaxes(title_text="Term Year", tickmode='linear')
    
    return fig

# 4. Visualization: Heatmap of First Party Win Rate by Issue Area and Decade
def win_rate_heatmap(df):
    # Create decade column
    df['decade'] = (df['term'] // 10) * 10
    
    # Calculate win rate by decade and issue area
    heatmap_data = df.groupby(['decade', 'issue_area'])['first_party_winner'].agg(['mean', 'count']).reset_index()
    heatmap_data = heatmap_data.rename(columns={'mean': 'win_rate', 'count': 'total_cases'})
    
    # Create pivot table for heatmap
    pivot_data = heatmap_data.pivot_table(
        values='win_rate', 
        index='issue_area',
        columns='decade',
        fill_value=0
    )
    
    # Create annotation text showing counts
    count_pivot = heatmap_data.pivot_table(
        values='total_cases', 
        index='issue_area',
        columns='decade',
        fill_value=0
    )
    
    annotations = []
    for i, row in enumerate(pivot_data.index):
        for j, col in enumerate(pivot_data.columns):
            if count_pivot.iloc[i, j] > 0:
                annotations.append(
                    dict(
                        x=col,
                        y=row,
                        text=f"{pivot_data.iloc[i, j]:.0%}<br>({int(count_pivot.iloc[i, j])})",
                        showarrow=False,
                        font=dict(color="white" if pivot_data.iloc[i, j] < 0.5 else "black")
                    )
                )
    
    # Create heatmap
    fig = go.Figure(data=go.Heatmap(
        z=pivot_data.values,
        x=pivot_data.columns,
        y=pivot_data.index,
        colorscale='RdBu',
        zmid=0.5,  # Center the colorscale at 50%
        text=[[f"{val:.0%}" for val in row] for row in pivot_data.values],
        hovertemplate='Decade: %{x}<br>Issue Area: %{y}<br>Win Rate: %{z:.1%}<extra></extra>'
    ))
    
    fig.update_layout(
        title='First Party Win Rate by Issue Area and Decade',
        xaxis_title='Decade',
        yaxis_title='Issue Area',
        annotations=annotations,
        template='plotly_dark',
        height=600
    )
    
    return fig

# 5. Visualization: Sunburst chart of case distribution
def case_distribution_sunburst(df):
    # Count cases for each combination
    sunburst_data = df.groupby(['issue_area', 'first_party_winner']).size().reset_index(name='count')
    
    # Map boolean to string for better display
    sunburst_data['outcome'] = sunburst_data['first_party_winner'].map({True: 'First Party Won', False: 'First Party Lost'})
    
    # Create sunburst chart
    fig = px.sunburst(
        sunburst_data,
        path=['issue_area', 'outcome'],
        values='count',
        title='Distribution of Supreme Court Cases by Issue Area and Outcome',
        color='first_party_winner',
        color_discrete_map={True: '#00cc96', False: '#ef553b'}
    )
    
    fig.update_layout(
        template='plotly_dark',
        height=700
    )
    
    return fig

# Function to generate and display all visualizations
def create_all_visualizations(df):
    # Call all the visualization functions and return a list of figures
    return [
        cases_by_issue_area_over_time(df),
        win_rate_by_issue_area(df),
        win_rate_over_time(df),
        win_rate_heatmap(df),
        case_distribution_sunburst(df)
    ]

# Example usage:
# figures = create_all_visualizations(majority_opinions)
# for fig in figures:
#     fig.show()


# To display just one visualization:
case_distribution_sunburst(df)

## Reasoning Topics' Network Analysis

In [64]:
# Reasoning Network Analysis
def create_reasoning_network(df):
    # Define legal concepts/terms to look for
    # This list could be expanded with domain expertise
    legal_concepts = [
        'amendment', 'constitution', 'precedent', 'statute', 'regulation',
        'rights', 'liberty', 'freedom', 'discrimination', 'equal protection',
        'due process', 'jurisdiction', 'standing', 'commerce clause',
        'first amendment', 'fourth amendment', 'fifth amendment', 'fourteenth amendment',
        'strict scrutiny', 'rational basis', 'compelling interest',
        'freedom of speech', 'freedom of religion', 'search and seizure',
        'cruel and unusual', 'double jeopardy', 'federalism', 'separation of powers'
    ]
    
    # Function to find legal concepts mentioned in text
    def find_legal_concepts(text):
        if pd.isna(text):
            return []
        
        text_lower = text.lower()
        found = []
        for concept in legal_concepts:
            if concept in text_lower:
                found.append(concept)
        return found
    
    # Extract legal concepts from LLM reasonings
    df['llm_concepts'] = df['llama_reasoning'].apply(find_legal_concepts)
    
    # Construct a graph where nodes are legal concepts and edges represent
    # co-occurrence in the same reasoning
    G = nx.Graph()
    
    # Add nodes
    for concept in legal_concepts:
        G.add_node(concept, type='concept')
    
    # Add edges based on co-occurrence
    for _, row in df.iterrows():
        concepts = row['llm_concepts']
        for i, concept1 in enumerate(concepts):
            for concept2 in concepts[i+1:]:
                if G.has_edge(concept1, concept2):
                    G[concept1][concept2]['weight'] += 1
                else:
                    G.add_edge(concept1, concept2, weight=1)
    
    # Filter for nodes that actually appear in the dataset
    G = G.subgraph([node for node in G.nodes() if G.degree(node) > 0])
    
    # Prepare network visualization
    # Use a force-directed layout
    pos = nx.spring_layout(G, seed=42)
    
    # Create edge traces
    edge_trace = []
    for edge in G.edges(data=True):
        source, target, data = edge
        x0, y0 = pos[source]
        x1, y1 = pos[target]
        weight = data.get('weight', 1)
        
        edge_trace.append(
            go.Scatter(
                x=[x0, x1, None], 
                y=[y0, y1, None],
                line=dict(width=weight * 0.5, color='#999'),
                hoverinfo='none',
                mode='lines',
                showlegend=False
            )
        )
    
    # Create node trace
    node_x = []
    node_y = []
    node_text = []
    node_size = []
    
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_text.append(node)
        # Size based on degree of the node
        node_size.append(10 + 5 * G.degree(node))
    
    # For coloring by issue area, we need to count which concepts appear in which issue areas
    concept_by_issue = {}
    for concept in legal_concepts:
        concept_by_issue[concept] = Counter()
    
    for _, row in df.iterrows():
        issue = row['issue_area']
        concepts = row['llm_concepts']
        for concept in concepts:
            concept_by_issue[concept][issue] += 1
    
    # Determine the most common issue area for each concept
    node_color = []
    node_hover = []
    
    issue_colors = {
        'Criminal Procedure': 'red',
        'Civil Rights': 'blue',
        'First Amendment': 'green',
        'Due Process': 'purple',
        'Privacy': 'orange',
        'Economic Activity': 'brown',
        'Judicial Power': 'pink',
        'Federalism': 'cyan',
        'Interstate Relations': 'lime',
        'Federal Taxation': 'magenta',
        'Miscellaneous': 'gray'
    }
    
    # Default colors for nodes not in our issues
    default_colors = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', 
                    '#ffff33', '#a65628', '#f781bf', '#999999']
    
    for i, node in enumerate(G.nodes()):
        if node in concept_by_issue and concept_by_issue[node]:
            most_common_issue = concept_by_issue[node].most_common(1)[0][0]
            color = issue_colors.get(most_common_issue, default_colors[hash(most_common_issue) % len(default_colors)])
            
            # Create hover text with issue breakdown
            issues_text = '<br>'.join([f"{issue}: {count}" for issue, count in concept_by_issue[node].most_common()])
            hover = f"{node}<br><br>Issue Areas:<br>{issues_text}"
        else:
            color = 'gray'
            hover = node
        
        node_color.append(color)
        node_hover.append(hover)
    
    node_trace = go.Scatter(
        x=node_x, 
        y=node_y,
        mode='markers+text',
        text=node_text,
        textposition="bottom center",
        marker=dict(
            showscale=False,
            color=node_color,
            size=node_size,
            line=dict(width=2, color='black')
        ),
        hovertext=node_hover,
        hoverinfo='text'
    )
    
    # Create figure
    fig = go.Figure(data=edge_trace + [node_trace],
                 layout=go.Layout(
                    title='Network of Legal Concepts in LLM Reasoning',
                    titlefont=dict(size=16),
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    width=1000,
                    height=800
                ))
    
    # Create a community analysis version
    # Detect communities using Louvain method
    communities = nx.community.louvain_communities(G)
    
    # Assign colors based on community
    community_node_colors = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', 
                            '#ffff33', '#a65628', '#f781bf', '#999999']
    
    node_community_color = []
    node_community = []
    
    for node in G.nodes():
        for i, community in enumerate(communities):
            if node in community:
                node_community_color.append(community_node_colors[i % len(community_node_colors)])
                node_community.append(i)
                break
        else:
            node_community_color.append('gray')
            node_community.append(-1)
    
    community_node_trace = go.Scatter(
        x=node_x, 
        y=node_y,
        mode='markers+text',
        text=node_text,
        textposition="bottom center",
        marker=dict(
            showscale=False,
            color=node_community_color,
            size=node_size,
            line=dict(width=2, color='black')
        ),
        hovertext=[f"{node}<br>Community: {comm}" for node, comm in zip(node_text, node_community)],
        hoverinfo='text'
    )
    
    # Create community version figure
    fig_communities = go.Figure(data=edge_trace + [community_node_trace],
                 layout=go.Layout(
                    title='Legal Concept Communities in LLM Reasoning',
                    titlefont=dict(size=16),
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    width=1000,
                    height=800
                ))
    
    return fig, fig_communities

# Call the function to create the network visualizations
network_fig, community_fig  = create_reasoning_network(df)

network_fig.show()
# community_fig.show()

## Confusion Matrix: SCOTUS Judge Decisions vs LLM Predictions
### Llama Model

In [65]:
# Assuming your dataframe is named 'df'
# Create the confusion matrix
conf_matrix = confusion_matrix(
    df['first_party_winner_binary'], 
    df['llama_prediction']
)

# Labels for the matrix
labels = ['First Party Lost', 'First Party Won']

# Create the heatmap
fig = ff.create_annotated_heatmap(
    z=conf_matrix,
    x=labels,
    y=labels,
    colorscale='purp',
    annotation_text=conf_matrix
)

# Update layout
fig.update_layout(
    title='Confusion Matrix: LLM Predictions vs. Actual Supreme Court Decisions',
    xaxis=dict(title='LLM Prediction'),
    yaxis=dict(title='Actual Outcome'),
    height=500,
    width=600
)

# Fix title font size
fig.update_layout(title_font=dict(size=14))  

# Fix axis orientation
fig.update_xaxes(side="bottom")

# Add figure borders
for i in range(len(fig.layout.annotations)):
    fig.layout.annotations[i].font.size = 14

fig.show()

## DeepSeek Model

In [66]:
# Assuming your dataframe is named 'df'
# Create the confusion matrix
conf_matrix = confusion_matrix(
    df['first_party_winner_binary'], 
    df['deepseek_prediction']
)

# Labels for the matrix
labels = ['First Party Lost', 'First Party Won']

# Create the heatmap
fig = ff.create_annotated_heatmap(
    z=conf_matrix,
    x=labels,
    y=labels,
    colorscale='Blues',
    annotation_text=conf_matrix
)

# Update layout
fig.update_layout(
    title='Confusion Matrix: LLM Predictions vs. Actual Supreme Court Decisions',
    xaxis=dict(title='LLM Prediction'),
    yaxis=dict(title='Actual Outcome'),
    height=500,
    width=600
)

# Fix title font size
fig.update_layout(title_font=dict(size=14))  

# Fix axis orientation
fig.update_xaxes(side="bottom")

# Add figure borders
for i in range(len(fig.layout.annotations)):
    fig.layout.annotations[i].font.size = 14

fig.show()

## Confusion Matrices Grouped by Issue Area
### Llama Model

In [67]:
# Get unique issue areas
issue_areas = df['issue_area'].unique()

# Create subplots - one for each issue area
rows = int(np.ceil(len(issue_areas) / 2))
fig = make_subplots(rows=rows, cols=2, 
                    subplot_titles=[f"Issue Area: {area}" for area in issue_areas],
                    vertical_spacing=0.12,
                    horizontal_spacing=0.5)

# Add confusion matrices for each issue area
row, col = 1, 1
for area in issue_areas:
    # Filter data for this issue area
    area_df = df[df['issue_area'] == area]
    
    # Create confusion matrix
    cm = confusion_matrix(
        area_df['first_party_winner_binary'], 
        area_df['llama_prediction'],
        labels=[0, 1]
    )
    
    # Create heatmap
    heatmap = go.Heatmap(
        z=cm,
        x=['First Party Lost', 'First Party Won'],
        y=['First Party Lost', 'First Party Won'],
        colorscale='purp',
        showscale=False,
        text=cm,
        texttemplate="%{text}",
        textfont={"size": 16}
    )
    
    # Add to subplot
    fig.add_trace(heatmap, row=row, col=col)
    
    # Update axis for each subplot
    fig.update_xaxes(title_text="LLM Prediction", row=row, col=col)
    fig.update_yaxes(title_text="Actual Outcome", row=row, col=col)
    
    # Move to next subplot position
    col += 1
    if col > 2:
        col = 1
        row += 1

# Update layout
fig.update_layout(
    title_text="Llama Confusion Matrices by Issue Area",
    height=300 * rows,
    width=1000
)

fig.show()

## DeepSeek Model

In [68]:
# Get unique issue areas
issue_areas = df['issue_area'].unique()

# Create subplots - one for each issue area
rows = int(np.ceil(len(issue_areas) / 2))
fig = make_subplots(rows=rows, cols=2, 
                    subplot_titles=[f"Issue Area: {area}" for area in issue_areas],
                    vertical_spacing=0.12,
                    horizontal_spacing=0.5)

# Add confusion matrices for each issue area
row, col = 1, 1
for area in issue_areas:
    # Filter data for this issue area
    area_df = df[df['issue_area'] == area]
    
    # Create confusion matrix
    cm = confusion_matrix(
        area_df['first_party_winner_binary'], 
        area_df['deepseek_prediction'],
        labels=[0, 1]
    )
    
    # Create heatmap
    heatmap = go.Heatmap(
        z=cm,
        x=['First Party Lost', 'First Party Won'],
        y=['First Party Lost', 'First Party Won'],
        colorscale='Blues',
        showscale=False,
        text=cm,
        texttemplate="%{text}",
        textfont={"size": 16}
    )
    
    # Add to subplot
    fig.add_trace(heatmap, row=row, col=col)
    
    # Update axis for each subplot
    fig.update_xaxes(title_text="LLM Prediction", row=row, col=col)
    fig.update_yaxes(title_text="Actual Outcome", row=row, col=col)
    
    # Move to next subplot position
    col += 1
    if col > 2:
        col = 1
        row += 1

# Update layout
fig.update_layout(
    title_text="DeepSeek Confusion Matrices by Issue Area",
    height=300 * rows,
    width=1000
)

fig.show()

# Llama Model's False Positive Investigation:
## Mechanistic Interpretability using Goodfire's Ember API :D

In [69]:
user_secrets = UserSecretsClient()

In [70]:
GOODFIRE_API_KEY = user_secrets.get_secret("goodfire")
client = goodfire.Client(api_key=GOODFIRE_API_KEY)

# Instantiate a model variant. 
variant = goodfire.Variant("meta-llama/Llama-3.3-70B-Instruct")

In [71]:
# Get False Positive Cases from Llama Model
df[(df['first_party_winner_binary'] == 0) & (df['llama_prediction'] == 1)]

Unnamed: 0.1,Unnamed: 0,name,term,facts,decision_type,first_party,second_party,first_party_winner,issue_area,facts_len,...,llama_reasoning,llama_consistency_predictions,llama_consistency_reasonings,llama_consistency_scores,deepseek_prediction,deepseek_reasoning,deepseek_consistency_predictions,deepseek_consistency_reasonings,deepseek_consistency_scores,llm_concepts
0,464,School Committee of the Town of Burlington v. ...,1984,<p>Under the provisions of the Education of th...,majority opinion,"School Committee of Burlington, Massachusetts,...",Department of Education of the Commonwealth of...,False,Civil Rights,3039,...,The Education of the Handicapped Act requires...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" The Education of the Handicapped Act requir...",100.0,0,The reasoning for this decision is based on t...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The reasoning for this decision is based on...",100.0,[]
30,3207,McKinney v. Arizona,2019,"<p>By way of relevant background, James McKinn...",majority opinion,James Erin McKinney,State of Arizona,False,Criminal Procedure,3021,...,I rule in favor of James Erin McKinney becaus...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" I rule in favor of James Erin McKinney beca...",100.0,0,The facts of the case indicate that James Eri...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The facts of the case indicate that James E...",100.0,[constitution]


McKinney v. Arizona	would be interesting to explore because it was controversial, with the court divided 5-4. The case involved a death sentence for James McKinney, who had been on death row in Arizona since 1993. 

Let us see why the model judged in favor of McKinney from a model features perspective.

In [72]:
false_pos_case = df[df['name'] == 'McKinney v. Arizona']
false_pos_case

Unnamed: 0.1,Unnamed: 0,name,term,facts,decision_type,first_party,second_party,first_party_winner,issue_area,facts_len,...,llama_reasoning,llama_consistency_predictions,llama_consistency_reasonings,llama_consistency_scores,deepseek_prediction,deepseek_reasoning,deepseek_consistency_predictions,deepseek_consistency_reasonings,deepseek_consistency_scores,llm_concepts
30,3207,McKinney v. Arizona,2019,"<p>By way of relevant background, James McKinn...",majority opinion,James Erin McKinney,State of Arizona,False,Criminal Procedure,3021,...,I rule in favor of James Erin McKinney becaus...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","["" I rule in favor of James Erin McKinney beca...",100.0,0,The facts of the case indicate that James Eri...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","["" The facts of the case indicate that James E...",100.0,[constitution]


In [73]:
prompt = f"""
    You are a Supreme Court Justice delivering a decision on a case.
	I will give you the year of the case, the first party, second party and facts of the case.
    Please determine whether you will rule in favor of the first party.
    Return an answer of TRUE if you decide in favor of the first party, or FALSE if not.
    FALSE is a catch-all response for any scenario other than a ruling in favor of the first party.
    In your response, please give your answer (either TRUE or FALSE) as the first part of a response, then a semicolon ";", and then follow with your reasoning for why you gave your answer.

    In your reasoning, do not use any information or examples from after the year of the case.


    This is the case:  
    Year: {false_pos_case['term']}
    First party: {false_pos_case['first_party']}
    Second party: {false_pos_case['second_party']}
    Facts: {false_pos_case['facts_cleaned']}
    """

In [74]:
# This will was the response from LLM after prompt
reasoning = str(false_pos_case['llama_reasoning'])
reasoning

'30     I rule in favor of James Erin McKinney becaus...\nName: llama_reasoning, dtype: object'

In [75]:
# Analyze how features activate in controversial court case (SCOTUS judge ruled against plaintiff and LLM ruled in favor)
inspector = client.features.inspect(
    [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": reasoning}
    ],
    model=variant
)

# Get top activated features
for activation in inspector.top(k=5):
    print(f"{activation.feature.label}: {activation.activation}")

Legal and court case descriptions and citations: 98
Supreme Court cases involving overturned precedents: 70
Landmark court cases and legal precedents: 49
Offensive request from the user: 43
Pandas DataFrame and Series output formatting tokens: 38


In [76]:
# Let's look at where in the model was it empathetic/compassionate since that is what like model seemed to lean on when ruling in favor of plaintiff regardless of heinous crimes
compassionate_features = client.features.search(
    "compassionate",
    model=variant,
    top_k=10
)
print(compassionate_features)

FeatureGroup([
   0: "Expressions of mercy and compassion",
   1: "Expressions of genuine emotional care and concern for others",
   2: "Empathetic acknowledgment of emotional pain or difficulty",
   3: "Understanding and sharing others' feelings (empathy definition)",
   4: "Moral emphasis on compassion and kindness as virtues",
   5: "Assistant maintaining professional courtesy and empathy, especially in difficult situations",
   6: "Offering emotional support and comfort to someone in distress",
   7: "Empathetic acknowledgment of user's emotional state or perspective",
   8: "Empathetic and considerate communication patterns",
   9: "Describing someone's nurturing and caring qualities in emotional tributes"
])


In [77]:
empathetic_features = client.features.search(
    "empathetic",
    model=variant,
    top_k=10
)
print(empathetic_features)

FeatureGroup([
   0: "Understanding and sharing others' feelings (empathy definition)",
   1: "Empathetic acknowledgment of user's emotional state or perspective",
   2: "Empathetic acknowledgment of emotional pain or difficulty",
   3: "Understanding and empathizing with others' perspectives",
   4: "Expressing empathy by acknowledging others' states of being",
   5: "Expressing empathy through shared experiences",
   6: "Empathetic validation of difficult emotions",
   7: "Cross-lingual representations of empathy and emotional understanding",
   8: "Empathetic validation of user feelings",
   9: "The assistant acknowledging user emotions or situations empathetically"
])


Look at neighboring compassionate features.

In [78]:
# Example of group
client.features.neighbors(
    compassionate_features[0],
    model=variant,
    top_k=5
)

FeatureGroup([
   0: "Cross-lingual representations of empathy and emotional understanding",
   1: "Character development showing unexpected empathy or moral growth",
   2: "Instructions or descriptions about being kind and considerate to others",
   3: "Character decides to take in and care for someone vulnerable",
   4: "Being at the mercy of someone/something else"
])

In [79]:
# Find more features that are similar to other features.
client.features.neighbors(
    compassionate_features[2],
    model=variant,
    top_k=5
)

FeatureGroup([
   0: "Descriptions of visceral emotional experiences and sensations",
   1: "Descriptions of harmful psychological and physical consequences when explaining why something is unethical",
   2: "Phrases acknowledging difficult periods when expressing empathy",
   3: "Processing and recovering from past trauma",
   4: "The assistant should provide emotional support and guidance for difficult situations"
])

# Hypothesis Testing / Statistical Test of Significance

### Accuracy metric (not variance in judgment on one case)

* Ho - The LLM model will perform at chance levels (50% diff) at the judgment prediction task.
* Ha - The model will perform better than chance levels (50% diff) at the judgement prediction task.

Since we are evaluating binary predictions (0/1) made by the LLM against true labels (SCOTUS judge decisions), the most appropriate statistical test is:

* Binomial Test (we only care about overall accuracy).
Tests if the LLM’s accuracy is significantly different from random guessing (e.g., 50% accuracy if balanced classes).

In [80]:
# Assuming your DataFrame is named df
# Replace 'scotus_decision' and 'llm_decision' with your column names
scotus_decision = df['first_party_winner_binary']
llm_decision = df['llama_prediction']

# Count the number of matches (successes)
matches = (scotus_decision == llm_decision).sum()
total_cases = len(df)

# Perform the Binomial Test
# Assuming the null hypothesis is that LLM is guessing randomly (50%)
result = binomtest(matches, n=total_cases, p=0.5, alternative='greater')

# Extract p-value
p_value = result.pvalue

# Print the result
print(f"Number of Matches: {matches}")
print(f"Total Cases: {total_cases}")
print(f"Binomial Test p-value: {p_value}")

if p_value < 0.05:
    print("Reject H0: The LLM is significantly better than random guessing.")
else:
    print("Fail to reject H0: The LLM is not significantly better than random guessing.")


Number of Matches: 19
Total Cases: 40
Binomial Test p-value: 0.6820859986855794
Fail to reject H0: The LLM is not significantly better than random guessing.


### Interpretation of Results
If p < 0.05, reject H₀ 
If p ≥ 0.05, fail to reject H₀
If power < 0.8, our test may be underpowered, meaning we may need more cases (n > 40) for a reliable conclusion.

* Power (1 - β) is the probability of correctly rejecting the null hypothesis when the alternative is true. Since we set power = 0.8, but only have 40 cases, the actual power may be much lower.

* Low Power = Higher Type II Error (False Negatives)
→ We might fail to reject H₀ even if there actually are consequences.
* Small Effect Sizes May Go Undetected

In short, not rejecting H₀ might just mean the sample size is too small to detect differences.

## Report Power to Acknowledge Limitations
Suspecting low power, explicitly calculating and reporting it:

In [81]:
# Count matches and mismatches
df['match'] = df['first_party_winner_binary'] == df['llama_prediction']
df['match'] = np.select(
[
    (df['first_party_winner_binary'] == df['llama_prediction'])
], 
[
    1
], 
default=0 )
observed_counts = df['match'].value_counts().sort_index().values  

# Define expected probabilities (modify if another baseline is relevant)
expected_prob = np.array([0.5, 0.5]) 
expected_counts = expected_prob * df.shape[0]

# Compute effect size (Cohen's W)
effect_size = np.sqrt(sum((observed_counts - expected_counts) ** 2 / expected_counts))

# Compute actual power
power_analysis = smp.GofChisquarePower()
actual_power = power_analysis.solve_power(effect_size, nobs=40, alpha=0.05, power=None)

print(f"Estimated Power: {actual_power:.2f}")

Estimated Power: 0.52


This is low. Standard power is set at 0.8 for statistically significant results when there actually is one.
Consider bootstrapping to get more samples to increase power.