In [1]:
from datetime import datetime
from typing import Tuple, List
import os
import re

import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import sys

import seaborn as sns

from config import master_folder

#Alternative master_folder import for the repository that also has non-anonymized data: 
#sys.path.append('..')
#from config import anonymized_folder as master_folder

Some useful variables and functions that will be used repeatedly

In [2]:
# Create other necessary directories
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create other necessary directories with timestamp only on analysis folder
analysis_path = os.path.join(master_folder, "analysis", "data", f"working_{timestamp}")
figures_path = os.path.join(analysis_path, "figures") 
tables_path = os.path.join(analysis_path, "tables")   
debug_path = os.path.join(master_folder, 'analysis', 'debug')  

for path in [analysis_path, figures_path, tables_path, debug_path]:
    os.makedirs(path, exist_ok=True)

# Task descriptions used across multiple functions
task_descriptions = {
    1: "Draft Client Email",
    2: "Draft Legal Memo",
    3: "Analysis of Complaint",
    4: "Draft NDA",
    5: "Draft Motion to Consolidate",
    6: "Draft CNC Enforcement Letter"
}

# Student type mapping used in balance tables and controls
student_type_mapping = {
    1: "2L Student",
    2: "3L Student",
    3: "LLM Student"
}

# AI use mapping used in balance tables and controls
ai_use_mapping = {
    1: "0 times",
    2: "1-5 times",
    3: "6-10 times",
    4: "11-20 times",
    5: "More than 20 times"
}

# Time columns mapping used in multiple analyses
time_cols = {
    1: 'Time_Spent_Assignment_1',
    2: 'Time_Spent_Assignment_2',
    3: 'Time_Spent_Assignment_3',
    4: 'Time_Spent_Assignment_4',
    5: 'Time_Spent_Assignment_5',
    6: 'Time_Spent_Assignment_6'
}

# Dictionary for mapping outcome variables with standardized column names
outcome_mappings = {
    'Accuracy': {i: f'P{i}_Criteria_1_Accuracy' for i in range(1, 7)},
    'Analysis': {i: f'P{i}_Criteria_2_Analysis' for i in range(1, 7)},
    'Organization': {i: f'P{i}_Criteria_3_Organization' for i in range(1, 7)},
    'Clarity': {i: f'P{i}_Criteria_4_Clarity' for i in range(1, 7)},
    'Professionalism': {i: f'P{i}_Criteria_5_Professionalism' for i in range(1, 7)},
    'Total Score': {i: f'P{i}_Total_Score' for i in range(1, 7)},
    'Time Spent': {i: f'Time_Spent_Assignment_{i}' for i in range(1, 7)},
    'Productivity': {i: f'P{i}_Productivity' for i in range(1, 7)}
}


# Commonly used formatting functions
def format_coefficient(coef, se, pval):
    """Format coefficient with stars and standard error."""
    stars = ''
    if pval < 0.01:
        stars = '^{***}'
    elif pval < 0.05:
        stars = '^{**}'
    elif pval < 0.1:
        stars = '^{*}'
    return f"${coef:.2f}{stars}$", f"(${se:.2f}$)"

def format_pct_change(pct_change):
    """Format percentage change with explicit plus sign for positive values."""
    if pct_change is None or pd.isna(pct_change):
        return "N/A"
    sign = '+' if pct_change > 0 else ''
    return f"${sign}{pct_change:.1f}\\%$"

def setup_plot_style():
    """Configure a consistent plot style."""
    plt.style.use('default')
    plt.rcParams.update({
        'font.size': 12,
        'figure.figsize': (10, 8),
        'axes.grid': True,
        'grid.color': 'lightgray',
        'grid.linestyle': '--',
        'grid.alpha': 0.5,
        'axes.axisbelow': True
    })

Merge Student identifiers with the group assignment file so we can get each observations group assignment


In [None]:
time_file_path = os.path.join(master_folder, "Time Spent on Assignments completed 12.3.24 ac.xlsx")
group_file_path = os.path.join(master_folder, "Group Assignments.xlsx")

time_df = pd.read_excel(time_file_path)
group_df = pd.read_excel(group_file_path)

# Merge the dataframes directly using student name columns
merged_df = time_df.merge(
    group_df[['Q5', 'group']], 
    left_on='Student name',
    right_on='Q5',
    how='left'
)

# Remove the redundant Q5 column
merged_df = merged_df.drop('Q5', axis=1)

# Create a basic matching report
report_df = merged_df[['Student name', 'group']]
report_df['Matched'] = report_df['group'].notna()

# Save the report as an Excel file instead of CSV
#report_output_path = os.path.join(master_folder, "name_matching_report.xlsx")
#report_df.to_excel(report_output_path, index=False)
#print(f"Name matching report saved as '{report_output_path}'.")

# Check for any unmatched students
unmatched = merged_df[merged_df['group'].isna()]
if len(unmatched) > 0:
    print("\nUnmatched students:")
    print(unmatched[['Student name']])

# Display matching statistics
print(f"\nTotal students in time_spent file: {len(time_df)}")
print(f"Successfully matched with groups: {len(merged_df.dropna(subset=['group']))}")

Name matching report saved as 'C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\name_matching_report.xlsx'.

Total students in time_spent file: 153
Successfully matched with groups: 153


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  report_df['Matched'] = report_df['group'].notna()


In [4]:
# Count observations in each group
print(merged_df['group'].value_counts())

group
C    51
A    51
B    51
Name: count, dtype: int64


Merge in all of the grades

In [5]:
# Read all grading sheets
problems = {}
for i in range(1, 7):
    file_suffix = "_12-8-2024" if i == 2 else "_12-8-24" if i == 5 else ""
    problems[i] = pd.read_excel(f"{master_folder}/Grading Sheet Vincent v.1 Problem {i}{file_suffix}.xlsx")

# Rename Problem 4's specific columns
problem4_rename = {
    'Criteria 1: Accuracy: How accurate and legally sound was your drafting of the nondisclosure agreement? ': 'Criteria 1',
    'Criteria 2: Analysis: How sound and thoughtful was your approach to the drafting and enforceability of the NDA? ': 'Criteria 2',
    'Criteria 3: Organization: How well-organized and structured was your NDA draft?\n': 'Criteria 3',
    'Criteria 4: Clarity: How clear and readable was your NDA?': 'Criteria 4',
    'Criteria 5: Professionalism: How well did you follow the assignments instructions and professional drafting standards?': 'Criteria 5'
}
problems[4] = problems[4].rename(columns=problem4_rename)

print("Problem 4 columns after first renaming:", problems[4].columns.tolist())

# Rename all columns to include problem number
def rename_grade_columns(df, problem_num):
    return df.rename(columns={col: f'P{problem_num}_{col}' for col in df.columns if col != 'Student #'})

# Apply renaming to all problems
problems = {num: rename_grade_columns(df, num) for num, df in problems.items()}

print("\nProblem 4 columns after rename_grade_columns:", problems[4].columns.tolist())

# Merge all datasets
final_df = merged_df.copy()
for i in range(1, 7):
    final_df = final_df.merge(problems[i], left_on='Student Number', right_on='Student #', how='left')
    final_df = final_df.drop('Student #', axis=1)

# Print diagnostics
print("\nRelevant columns in final_df:")
p4_cols = [col for col in final_df.columns if 'P4_' in col]
print(p4_cols)

print("\nFinal dataset shape:", final_df.shape)
print("\nNumber of students with grades:")
for i in range(1, 7):
    grade_col = f'P{i}_Grade'
    print(f"Problem {i}: {final_df[grade_col].notna().sum()} students")

print("\nFirst few rows of final dataset:")
print(final_df.head())

Problem 4 columns after first renaming: ['Student #', 'Criteria 1', 'Criteria 2', 'Criteria 3', 'Criteria 4', 'Criteria 5', 'Total Score', 'Grade', 'Hallucination', 'Notes', 'Unnamed: 10']

Problem 4 columns after rename_grade_columns: ['Student #', 'P4_Criteria 1', 'P4_Criteria 2', 'P4_Criteria 3', 'P4_Criteria 4', 'P4_Criteria 5', 'P4_Total Score', 'P4_Grade', 'P4_Hallucination', 'P4_Notes', 'P4_Unnamed: 10']

Relevant columns in final_df:
['P4_Criteria 1', 'P4_Criteria 2', 'P4_Criteria 3', 'P4_Criteria 4', 'P4_Criteria 5', 'P4_Total Score', 'P4_Grade', 'P4_Hallucination', 'P4_Notes', 'P4_Unnamed: 10']

Final dataset shape: (154, 73)

Number of students with grades:
Problem 1: 145 students
Problem 2: 125 students
Problem 3: 127 students
Problem 4: 128 students
Problem 5: 127 students
Problem 6: 127 students

First few rows of final dataset:
     Student Number      Student name  Completion of Experiment  \
0  c1f9916910cd4128  c1f9916910cd4128                       0.0   
1  7316aefc

In [6]:
## Bit more cleaning
final_df = final_df.drop(['P1_Unnamed: 11', 'P3_Column 1', 'P4_Unnamed: 10', 'P6_Unnamed: 11', 'P6_Unnamed: 12'], axis=1)

Create dfs for each task

In [7]:
# Clean up group values before processing
final_df['group'] = final_df['group'].apply(lambda x: str(x).strip().upper() if pd.notna(x) else x)

# Modified function to handle NaN and invalid groups
def get_ai_condition(group, task_num):
    conditions = {
        1: {'A': 'No AI', 'B': 'GPT 01', 'C': 'Vincent'},
        2: {'A': 'Vincent', 'B': 'No AI', 'C': 'GPT 01'},
        3: {'A': 'GPT 01', 'B': 'Vincent', 'C': 'No AI'},
        4: {'A': 'No AI', 'B': 'GPT 01', 'C': 'Vincent'},
        5: {'A': 'Vincent', 'B': 'No AI', 'C': 'GPT 01'},
        6: {'A': 'GPT 01', 'B': 'Vincent', 'C': 'No AI'}
    }
    
    try:
        if pd.isna(group):
            return np.nan
        group_str = str(group).strip().upper()
        if group_str not in ['A', 'B', 'C']:
            print(f"Warning: Invalid group value found: {group}")
            return np.nan
        return conditions[task_num][group_str]
    except Exception as e:
        print(f"Error processing group {group} for task {task_num}: {str(e)}")
        return np.nan

# Remove duplicates from final_df first, keeping the first occurrence
final_df = final_df.drop_duplicates(subset=['Student Number'], keep='first')

print("\nProcessing task dataframes...")

# Function to create task dataframe
def create_task_df(task_num, time_col):
    task_cols = ['Student Number'] + [col for col in final_df.columns if f'P{task_num}_' in col] + [time_col, 'group']
    task_df = final_df[task_cols].copy()
    task_df['AI_Condition'] = task_df['group'].apply(lambda x: get_ai_condition(x, task_num))
    return task_df

# Create all task dataframes
task_dfs = {}
time_columns = {
    1: 'Time Spent on Assignment One (4177421)',
    2: 'Time Spent on Assignment Two (4177422)',
    3: 'Time Spent on Assignment Three (4177428)',
    4: 'Time Spent on Assignment Four (4177442)',
    5: 'Time Spent on Assignment Five (4177448)',
    6: 'Time Spent on Assignment Six (4177449)'
}

for task_num in range(1, 7):
    print(f"\nProcessing Task {task_num}:")
    task_df = create_task_df(task_num, time_columns[task_num])
    task_dfs[task_num] = task_df
    
    # Print diagnostics for each task
    print(f"Total rows: {len(task_df)}")
    print("AI Condition distribution:")
    print(task_df['AI_Condition'].value_counts(dropna=False))
    
    # Save the dataframe
    output_path = f"{analysis_path}/task{task_num}_data.csv"
    task_df.to_csv(output_path, index=False)
    print(f"Saved to: {output_path}")

# Final verification
print("\nVerification after processing:")
print("Task counts:")
for task_num in range(1, 7):
    task_df = task_dfs[task_num]
    print(f"Task {task_num}:")
    print(f"  Total rows: {len(task_df)}")
    print(f"  Students with AI condition: {task_df['AI_Condition'].notna().sum()}")
    print(f"  Duplicate student numbers: {task_df['Student Number'].duplicated().sum()}")
    print("  AI Condition distribution:")
    print(task_df['AI_Condition'].value_counts(dropna=False))
    print()

print("\nProcessing complete!")


Processing task dataframes...

Processing Task 1:
Total rows: 153
AI Condition distribution:
AI_Condition
Vincent    51
No AI      51
GPT 01     51
Name: count, dtype: int64
Saved to: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808/task1_data.csv

Processing Task 2:
Total rows: 153
AI Condition distribution:
AI_Condition
GPT 01     51
Vincent    51
No AI      51
Name: count, dtype: int64
Saved to: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808/task2_data.csv

Processing Task 3:
Total rows: 153
AI Condition distribution:
AI_Condition
No AI      51
GPT 01     51
Vincent    51
Name: count, dtype: int64
Saved to: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808/task3_data.csv

Processing Task 4:
Total rows: 153
AI Condition distribution:
AI_Condition
Vi

In [8]:
# Identify students with missing groups
students_without_groups = final_df[final_df['group'].isna()]

print("Students without groups:")
print(students_without_groups['Student Number'].tolist())

# Check if these students have any data
print("\nSample data for students without groups:")
print(students_without_groups.head())

Students without groups:
[]

Sample data for students without groups:
Empty DataFrame
Columns: [Student Number, Student name, Completion of Experiment, Time Spent on Assignment One (4177421), Time Spent on Assignment Two (4177422), Time Spent on Assignment Three (4177428), Time Spent on Assignment Four (4177442), Time Spent on Assignment Five (4177448), Time Spent on Assignment Six (4177449), group, P1_Accuracy, P1_Analysis, P1_Organization, P1_Clarity, P1_Professionalism, P1_Total #, P1_Grade, P1_Hallucination, P1_Hallu_note, P1_Notes, P2_Criteria 1, P2_Criteria 2, P2_Criteria 3, P2_Criteria 4, P2_Criteria 5, P2_Total Score, P2_Grade, P2_Hallucination, P2_Hallu_note, P2_Notes, P3_Criteria 1: Accuracy, P3_Criteria 2: Analysis, P3_Criteria 3: Organization, P3_Criteria 4: Clarity, P3_Criteria 5: Professionalism, P3_Total Score, P3_Grade, P3_Hallucination, P3_Notes, P4_Criteria 1, P4_Criteria 2, P4_Criteria 3, P4_Criteria 4, P4_Criteria 5, P4_Total Score, P4_Grade, P4_Hallucination, P4_No

Make sure outcome variable data is ready for analysis

In [9]:
# Define column mapping
col_mapping = {
    # Student Info
    "Student Number": "Student_Number",
    "group": "Group",
    "AI_Condition": "AI_Condition",
    
    # Task 1
    "P1_Accuracy": "P1_Criteria_1_Accuracy",
    "P1_Analysis": "P1_Criteria_2_Analysis", 
    "P1_Organization": "P1_Criteria_3_Organization",
    "P1_Clarity": "P1_Criteria_4_Clarity",
    "P1_Professionalism": "P1_Criteria_5_Professionalism",
    "P1_Total #": "P1_Total_Score",
    "P1_Grade": "P1_Grade",
    "P1_Hallu?": "P1_Hallucination",
    "P1_Hallu_note": "P1_Hallucination_Note",
    "P1_Notes": "P1_Notes",
    "Time Spent on Assignment One (4177421)": "Time_Spent_Assignment_1",
    
    # Task 2
    "P2_Criteria 1": "P2_Criteria_1_Accuracy",
    "P2_Criteria 2": "P2_Criteria_2_Analysis",
    "P2_Criteria 3": "P2_Criteria_3_Organization",
    "P2_Criteria 4": "P2_Criteria_4_Clarity",
    "P2_Criteria 5": "P2_Criteria_5_Professionalism",
    "P2_Total Score": "P2_Total_Score",
    "P2_Grade": "P2_Grade",
    "P2_Hallucination": "P2_Hallucination",
    "P2_Hallu_note": "P2_Hallucination_Note",
    "P2_Notes": "P2_Notes",
    "Time Spent on Assignment Two (4177422)": "Time_Spent_Assignment_2",
    
    # Task 3
    "P3_Criteria 1: Accuracy": "P3_Criteria_1_Accuracy",
    "P3_Criteria 2: Analysis": "P3_Criteria_2_Analysis",
    "P3_Criteria 3: Organization": "P3_Criteria_3_Organization",
    "P3_Criteria 4: Clarity": "P3_Criteria_4_Clarity",
    "P3_Criteria 5: Professionalism": "P3_Criteria_5_Professionalism",
    "P3_Total Score": "P3_Total_Score",
    "P3_Grade": "P3_Grade",
    "P3_Hallucination": "P3_Hallucination",
    "P3_Notes": "P3_Notes",
    "Time Spent on Assignment Three (4177428)": "Time_Spent_Assignment_3",
    
    # Task 4
    "P4_Criteria 1": "P4_Criteria_1_Accuracy",
    "P4_Criteria 2": "P4_Criteria_2_Analysis",
    "P4_Criteria 3": "P4_Criteria_3_Organization",
    "P4_Criteria 4": "P4_Criteria_4_Clarity",
    "P4_Criteria 5": 
        "P4_Criteria_5_Professionalism",
    "P4_Total Score": "P4_Total_Score",
    "P4_Grade": "P4_Grade",
    "P4_Hallucination": "P4_Hallucination",
    "P4_Notes": "P4_Notes",
    "Time Spent on Assignment Four (4177442)": "Time_Spent_Assignment_4",
    
    # Task 5
    "P5_Criteria 1": "P5_Criteria_1_Accuracy",
    "P5_Criteria 2": "P5_Criteria_2_Analysis",
    "P5_Criteria 3": "P5_Criteria_3_Organization",
    "P5_Criteria 4": "P5_Criteria_4_Clarity",
    "P5_Criteria 5": "P5_Criteria_5_Professionalism",
    "P5_Total Score": "P5_Total_Score",
    "P5_Grade": "P5_Grade",
    "P5_Hallucination": "P5_Hallucination",
    "P5_Hallu_note": "P5_Hallucination_Note",
    "P5_Notes": "P5_Notes",
    "Time Spent on Assignment Five (4177448)": "Time_Spent_Assignment_5",
    
    # Task 6
    "P6_Accuracy": "P6_Criteria_1_Accuracy",
    "P6_Analysis": "P6_Criteria_2_Analysis",
    "P6_Organization": "P6_Criteria_3_Organization",
    "P6_Clarity": "P6_Criteria_4_Clarity",
    "P6_Professionalism": "P6_Criteria_5_Professionalism",
    "P6_Total Score": "P6_Total_Score",
    "P6_Grade": "P6_Grade",
    "P6_Hallucination": "P6_Hallucination",
    "P6_Hallu_note": "P6_Hallucination_Note",
    "P6_Notes": "P6_Notes",
    "Time Spent on Assignment Six (4177449)": "Time_Spent_Assignment_6"
}

def analyze_group_distribution(df, file_name):
    """Analyze and print the distribution of Group and AI_Condition columns."""
    print(f"\n{'='*50}")
    print(f"Distribution Analysis for: {file_name}")
    print(f"Total rows: {len(df)}")
    
    # Analyze Group distribution
    if 'group' in df.columns or 'Group' in df.columns:
        group_col = 'group' if 'group' in df.columns else 'Group'
        print("\nGroup Distribution:")
        group_dist = df[group_col].value_counts(dropna=False).sort_index()
        print(group_dist)
        print("\nGroup Distribution (Percentages):")
        print((group_dist / len(df) * 100).round(2))
    
    # Analyze AI_Condition distribution
    if 'AI_Condition' in df.columns:
        print("\nAI_Condition Distribution:")
        ai_dist = df['AI_Condition'].value_counts(dropna=False).sort_index()
        print(ai_dist)
        print("\nAI_Condition Distribution (Percentages):")
        print((ai_dist / len(df) * 100).round(2))

def convert_to_numeric(df):
    """
    Converts columns to numeric, skipping certain columns.
    """
    skip_list = ['Group', 'AI_Condition', 'Grade', 'Hallucination', 'Note', 'Notes', 'Email', 'Student']
    
    df_converted = df.copy()
    for col in df_converted.columns:
        if not any(skip in col for skip in skip_list):
            df_converted[col] = pd.to_numeric(df_converted[col], errors='coerce')
    return df_converted

def check_before_conversion(df):
    """Check and print column types and unique values before conversion."""
    print("\nBefore conversion:")
    print(df.dtypes)
    object_cols = df.select_dtypes(include=['object']).columns
    for col in object_cols:
        if not any(skip in col for skip in ['Group', 'AI_Condition', 'Grade', 'Hallucination', 'Note', 'Notes', 'Email', 'Student']):
            print(f"\nUnique values in {col}:")
            print(df[col].unique())

def robust_rename_columns(df):
    """Apply column renaming with special handling for problematic columns."""
    # First, handle the problematic P4 column specifically
    p4_prof_col = [col for col in df.columns if "P4_Criteria 5" in col and "Professionalism" in col]
    if p4_prof_col:
        df = df.rename(columns={p4_prof_col[0]: "P4_Criteria_5_Professionalism"})
    
    # Then apply the standard mapping for other columns
    df.rename(columns=col_mapping, inplace=True)
    
    return df

# Main processing
print("\nStarting data processing and distribution analysis...")

# Get list of CSV files
csv_files = [f for f in os.listdir(analysis_path) if f.endswith('.csv')]
dataframes = []

# First pass: Load and analyze distributions
print("\nInitial distribution analysis:")
for file in csv_files:
    file_path = os.path.join(analysis_path, file)
    df = pd.read_csv(file_path)
    analyze_group_distribution(df, file)
    
    # Apply the robust renaming function instead of direct renaming
    df = robust_rename_columns(df)
    
    dataframes.append(df)
    df.to_csv(file_path, index=False)

# Second pass: Process each task DataFrame
print("\nProcessing task DataFrames:")
for i, df in enumerate(dataframes, 1):
    print(f"\nProcessing Task {i} Data:")
    print("=" * 50)
    
    # Check data before conversion
    check_before_conversion(df)
    
    # Convert to numeric
    converted_df = convert_to_numeric(df)
    
    # Calculate productivity
    total_score_col = f"P{i}_Total_Score"
    time_spent_col = f"Time_Spent_Assignment_{i}"
    productivity_col = f"P{i}_Productivity"
    
    if total_score_col in converted_df.columns and time_spent_col in converted_df.columns:
        converted_df[productivity_col] = converted_df[total_score_col] / converted_df[time_spent_col]
    
    # Show distributions after processing
    print("\nDistributions after processing:")
    analyze_group_distribution(converted_df, f"Task {i} (After Processing)")
    
    # Save the converted dataframe
    out_path = os.path.join(analysis_path, f"task{i}_data_numeric.csv")
    converted_df.to_csv(out_path, index=False)
    print(f"Saved processed data to: {out_path}")

print("\nConversion and distribution analysis complete!")


Starting data processing and distribution analysis...

Initial distribution analysis:

Distribution Analysis for: task1_data.csv
Total rows: 153

Group Distribution:
group
A    51
B    51
C    51
Name: count, dtype: int64

Group Distribution (Percentages):
group
A    33.33
B    33.33
C    33.33
Name: count, dtype: float64

AI_Condition Distribution:
AI_Condition
GPT 01     51
No AI      51
Vincent    51
Name: count, dtype: int64

AI_Condition Distribution (Percentages):
AI_Condition
GPT 01     33.33
No AI      33.33
Vincent    33.33
Name: count, dtype: float64

Distribution Analysis for: task2_data.csv
Total rows: 153

Group Distribution:
group
A    51
B    51
C    51
Name: count, dtype: int64

Group Distribution (Percentages):
group
A    33.33
B    33.33
C    33.33
Name: count, dtype: float64

AI_Condition Distribution:
AI_Condition
GPT 01     51
No AI      51
Vincent    51
Name: count, dtype: int64

AI_Condition Distribution (Percentages):
AI_Condition
GPT 01     33.33
No AI      33

In [10]:
# Iterate over tasks 1-6
for i in range(1, 7):
    file_path = os.path.join(analysis_path, f"task{i}_data_numeric.csv")
    cleaned_file_path = os.path.join(analysis_path, f"task{i}_data_cleaned.csv")
    
    try:
        # Load the numeric dataset
        df = pd.read_csv(file_path)
        
        # Define the total score column name
        total_score_col = f"P{i}_Total_Score"
        
        # Store original row count
        original_count = len(df)
        
        # Make sure the total score column is numeric
        df[total_score_col] = pd.to_numeric(df[total_score_col], errors='coerce')
        
        # Print initial stats
        print(f"Task {i} before filtering - Count: {len(df)}")
        print(f"Rows with Total Score = 0: {len(df[df[total_score_col] == 0])}")
        
        # First check: Find problematic rows with exact zero values and print them
        zero_rows = df[df[total_score_col] == 0]
        if len(zero_rows) > 0:
            print(f"Found {len(zero_rows)} rows with exact zero Total Score in task {i}:")
            print(zero_rows[[total_score_col, 'Student_Number']].head())
        
        # Drop rows where P{i}_Total_Score is NaN
        df_cleaned = df.dropna(subset=[total_score_col])
        
        # Explicitly filter out EXACT zero values using boolean indexing
        # This should catch any value that is precisely 0 (integer or float)
        df_cleaned = df_cleaned[df_cleaned[total_score_col] != 0]
        
        # Extra safety: Also filter out any near-zero values
        threshold = 1e-6
        df_cleaned = df_cleaned[df_cleaned[total_score_col] > threshold]
        
        # Verification step after filtering
        zero_check = df_cleaned[df_cleaned[total_score_col] == 0]
        if len(zero_check) > 0:
            print(f"WARNING: Still found {len(zero_check)} zero values after filtering!")
            print(zero_check[[total_score_col, 'Student_Number']].head())
        else:
            print(f"Verification passed: No zero values remain in the dataset.")
        
        # Save the cleaned dataset
        df_cleaned.to_csv(cleaned_file_path, index=False)
        
        print(f"Task {i}: Cleaned data saved to {cleaned_file_path}")
        print(f"Original rows: {original_count}, Cleaned rows: {len(df_cleaned)}")
        print(f"Removed rows: {original_count - len(df_cleaned)}")
        print("-" * 50)
        
    except FileNotFoundError:
        print(f"Warning: Could not find file {file_path}")
    except Exception as e:
        print(f"Error processing task {i}: {str(e)}")

print("Cleaning complete.")

Task 1 before filtering - Count: 153
Rows with Total Score = 0: 1
Found 1 rows with exact zero Total Score in task 1:
     P1_Total_Score    Student_Number
152             0.0  a067276138a65c6a
Verification passed: No zero values remain in the dataset.
Task 1: Cleaned data saved to C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\task1_data_cleaned.csv
Original rows: 153, Cleaned rows: 135
Removed rows: 18
--------------------------------------------------
Task 2 before filtering - Count: 153
Rows with Total Score = 0: 1
Found 1 rows with exact zero Total Score in task 2:
     P2_Total_Score    Student_Number
142             0.0  d8bb952342c588bb
Verification passed: No zero values remain in the dataset.
Task 2: Cleaned data saved to C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\task2_data_cleaned.csv
Original rows: 153, Cleane

In [11]:
# Assuming analysis_path is defined
zero_scores_found = False

for i in range(1, 7):
    cleaned_file_path = os.path.join(analysis_path, f"task{i}_data_cleaned.csv")
    
    try:
        # Load the cleaned dataset
        task_df = pd.read_csv(cleaned_file_path)
        
        # Check for zero values in Total Score column
        zero_scores = task_df[task_df[f"P{i}_Total_Score"] == 0]
        zero_count = len(zero_scores)
        
        if zero_count > 0:
            zero_scores_found = True
            print(f"Task {i}: Found {zero_count} rows with zero Total Score!")
            print(zero_scores.head())  # Show the first few problematic rows
        else:
            print(f"Task {i}: No zero values in Total Score column. Data is clean.")
        
        # Additional validation stats
        print(f"Task {i}: Min Total Score = {task_df[f'P{i}_Total_Score'].min()}")
        print(f"Task {i}: Number of rows = {len(task_df)}")
        print("-" * 50)
        
    except FileNotFoundError:
        print(f"Warning: Could not find cleaned file {cleaned_file_path}")
    except Exception as e:
        print(f"Error checking task {i}: {str(e)}")

if not zero_scores_found:
    print("All tasks checked. No zero values found in any Total Score columns.")
else:
    print("WARNING: Zero values found in some Total Score columns! Check the logs above.")

Task 1: No zero values in Total Score column. Data is clean.
Task 1: Min Total Score = 5.0
Task 1: Number of rows = 135
--------------------------------------------------
Task 2: No zero values in Total Score column. Data is clean.
Task 2: Min Total Score = 5.0
Task 2: Number of rows = 125
--------------------------------------------------
Task 3: No zero values in Total Score column. Data is clean.
Task 3: Min Total Score = 7.0
Task 3: Number of rows = 127
--------------------------------------------------
Task 4: No zero values in Total Score column. Data is clean.
Task 4: Min Total Score = 12.0
Task 4: Number of rows = 127
--------------------------------------------------
Task 5: No zero values in Total Score column. Data is clean.
Task 5: Min Total Score = 2.0
Task 5: Number of rows = 127
--------------------------------------------------
Task 6: No zero values in Total Score column. Data is clean.
Task 6: Min Total Score = 6.0
Task 6: Number of rows = 126
------------------------

Now going to create a table that shows the differences in productivity for each of the six tasks

In [12]:
# Load the updated task DataFrames from the saved numeric files
task_dfs = {}
for task_num in range(1, 7):
    file_path = os.path.join(analysis_path, f"task{task_num}_data_cleaned.csv")
    try:
        df = pd.read_csv(file_path)
        task_dfs[task_num] = df
    except FileNotFoundError:
        print(f"File not found: {file_path}. Skipping task {task_num}.")
        continue
    except Exception as e:
        print(f"Error reading {file_path}: {e}. Skipping task {task_num}.")
        continue

def extract_treatment_effects(task_num, outcome):
    """Extract treatment effects for a given task and outcome variable."""
    
    # Ensure task data exists
    if task_num not in task_dfs:
        raise ValueError(f"Data for task {task_num} is not available.")

    df = task_dfs[task_num].copy()

    # Check required columns
    required_cols = ['AI_Condition', outcome]
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing columns {missing_cols} in task {task_num} data.")

    # Create dummy variables for AI conditions
    df['Vincent_dummy'] = (df['AI_Condition'] == 'Vincent').astype(int)
    df['GPT01_dummy'] = (df['AI_Condition'] == 'GPT 01').astype(int)

    # Drop rows with missing outcome values
    df_clean = df.dropna(subset=[outcome])
    if df_clean.empty:
        raise ValueError(f"No data available for outcome '{outcome}' in task {task_num}.")

    # Prepare regression inputs
    X = sm.add_constant(df_clean[['Vincent_dummy', 'GPT01_dummy']])
    y = df_clean[outcome]

    # Fit model
    try:
        model = sm.OLS(y, X)
        results = model.fit(cov_type='HC1')
    except Exception as e:
        raise RuntimeError(f"Error fitting model for task {task_num} and outcome '{outcome}': {e}")

    # Compute control group mean
    control_group = df_clean[df_clean['AI_Condition'] == 'No AI']
    if control_group.empty:
        raise ValueError(f"No control group ('No AI') data found for task {task_num}.")
    control_mean = control_group[outcome].mean()

    # Calculate percent change while handling division by zero
    pct_change_vincent = (results.params['Vincent_dummy'] / control_mean) * 100 if control_mean else np.nan
    pct_change_gpt = (results.params['GPT01_dummy'] / control_mean) * 100 if control_mean else np.nan

    sample_size = len(df_clean)

    # Construct results dictionary
    vincent_effect = {
        'coef': results.params['Vincent_dummy'],
        'se': results.bse['Vincent_dummy'],
        'pval': results.pvalues['Vincent_dummy'],
        'pct_change': pct_change_vincent,
        'N': sample_size
    }

    gpt_effect = {
        'coef': results.params['GPT01_dummy'],
        'se': results.bse['GPT01_dummy'],
        'pval': results.pvalues['GPT01_dummy'],
        'pct_change': pct_change_gpt,
        'N': sample_size
    }

    return vincent_effect, gpt_effect, control_mean, sample_size

results_data = []

# Loop over tasks to extract treatment effects and compile results
for task_num in range(1, 7):
    outcome = f'P{task_num}_Productivity'
    try:
        vincent_effect, gpt_effect, control_mean, sample_size = extract_treatment_effects(task_num, outcome)
    except Exception as e:
        print(f"Skipping task {task_num} due to error: {e}")
        continue
    
    def format_coef(coef, pval):
        stars = ''
        if pval < 0.01:
            stars = '^{***}'
        elif pval < 0.05:
            stars = '^{**}'
        elif pval < 0.1:
            stars = '^{*}'
        return f"${coef:.3f}{stars}$"
    
    # Append Vincent's results
    results_data.append({
        'Task': task_descriptions.get(task_num, f"Task {task_num}"),
        'Control Mean': f"${control_mean:.3f}$",
        'Model': 'Vincent',
        'Effect': format_coef(vincent_effect['coef'], vincent_effect['pval']),
        'SE': f"(${vincent_effect['se']:.3f}$)",
        'Pct Change': f"+${vincent_effect['pct_change']:.1f}\\%$" if not pd.isna(vincent_effect['pct_change']) else "N/A",
        'N': sample_size
    })
    
    # Append GPT-01's results
    results_data.append({
        'Task': task_descriptions.get(task_num, f"Task {task_num}"),
        'Control Mean': f"${control_mean:.3f}$",
        'Model': 'o1-preview',
        'Effect': format_coef(gpt_effect['coef'], gpt_effect['pval']),
        'SE': f"(${gpt_effect['se']:.3f}$)",
        'Pct Change': f"+${gpt_effect['pct_change']:.1f}\\%$" if not pd.isna(gpt_effect['pct_change']) else "N/A",
        'N': sample_size
    })

# Create DataFrame from results
results_df = pd.DataFrame(results_data)

# Generate LaTeX table string with the correct number of columns (7)
latex_table = "\\begin{table}[!htbp]\n\\centering\n"
latex_table += "\\caption{Treatment Effects on Task Productivity (Points per Minute)}\n"
latex_table += "\\label{tab:productivity_effects}\n"
latex_table += "\\begin{tabular}{lcccccc}\n"  # Now 7 columns: l c c c c c c
latex_table += "\\hline\\hline\n"
latex_table += "Task & Control Mean & Model & Effect & SE & \\% Change & N \\\\\n"
latex_table += "\\hline\n"

# Loop over tasks to create table rows
for task in task_descriptions.values():
    task_rows = results_df[results_df['Task'] == task]
    if task_rows.empty:
        continue
    first_row = True
    for _, row in task_rows.iterrows():
        if first_row:
            latex_table += f"\\multirow{{2}}{{*}}{{{row['Task']}}} & "
            latex_table += f"\\multirow{{2}}{{*}}{{{row['Control Mean']}}} "
            first_row = False
        else:
            latex_table += "& "
        latex_table += f"& {row['Model']} & {row['Effect']} & {row['SE']} & {row['Pct Change']} & {row['N']} \\\\\n"
    latex_table += "\\hline\n"

latex_table += ("\\multicolumn{7}{p{0.95\\linewidth}}{\\footnotesize \\textit{Notes:} "
                "Effects shown as absolute increase in points per minute relative to No AI control group. "
                "Percent changes calculated relative to control group mean. "
                "Robust standard errors in parentheses. "
                "$^{***}p<0.01$, $^{**}p<0.05$, $^{*}p<0.1$. Sample size (N) represents the number of observations used in the regression.}\n")
latex_table += "\\end{tabular}\n"
latex_table += "\\end{table}"

# Save LaTeX table to file with the specified path
table_file_path = os.path.join(tables_path, "productivity_treatment_effects_with_pct.tex")
try:
    with open(table_file_path, "w") as f:
        f.write(latex_table)
    print(f"\nTable saved to: {table_file_path}")
except Exception as e:
    print(f"Error saving LaTeX table: {e}")

# Print a readable version of the results
print("\nTreatment Effects on Task Productivity")
print("=====================================")
for task_num in range(1, 7):
    task_name = task_descriptions.get(task_num)
    if task_name not in results_df['Task'].unique():
        continue
    task_rows = results_df[results_df['Task'] == task_name]
    print(f"\n{task_name}:")
    print(f"Control Mean: {task_rows['Control Mean'].iloc[0]} points per minute")
    for _, row in task_rows.iterrows():
        print(f"{row['Model']}: {row['Effect']} {row['SE']} ({row['Pct Change']} change) | N = {row['N']}")



Table saved to: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\productivity_treatment_effects_with_pct.tex

Treatment Effects on Task Productivity

Draft Client Email:
Control Mean: $0.393$ points per minute
Vincent: $0.216^{***}$ ($0.055$) (+$55.0\%$ change) | N = 134
o1-preview: $0.135^{***}$ ($0.047$) (+$34.3\%$ change) | N = 134

Draft Legal Memo:
Control Mean: $0.100$ points per minute
Vincent: $0.058^{***}$ ($0.018$) (+$57.4\%$ change) | N = 124
o1-preview: $0.074^{***}$ ($0.021$) (+$73.5\%$ change) | N = 124

Analysis of Complaint:
Control Mean: $0.238$ points per minute
Vincent: $0.273^{***}$ ($0.058$) (+$114.6\%$ change) | N = 126
o1-preview: $0.206^{***}$ ($0.047$) (+$86.7\%$ change) | N = 126

Draft NDA:
Control Mean: $0.373$ points per minute
Vincent: $0.070$ ($0.070$) (+$18.7\%$ change) | N = 127
o1-preview: $0.038$ ($0.060$) (+$10.3\%$ change) | N = 127

Draft Motion to Consolidate:
C

Loop through the other outcome variables to create results tables for each on each task

In [13]:
# Load task dataframes
task_dfs = {i: pd.read_csv(os.path.join(analysis_path, f"task{i}_data_cleaned.csv")) for i in range(1, 7)}

def create_outcome_tables():
    for outcome in outcome_mappings.keys():
        print(f"\nGenerating table for {outcome}...")

        results_data = []

        for task_num in range(1, 7):
            outcome_var = outcome_mappings[outcome][task_num]
            vincent_effect, gpt_effect, control_mean, sample_size = extract_treatment_effects(task_num, outcome_var)

            def format_coef(coef, pval):
                stars = ''
                if pval < 0.01:
                    stars = '^{***}'
                elif pval < 0.05:
                    stars = '^{**}'
                elif pval < 0.1:
                    stars = '^{*}'
                return f"${coef:.3f}{stars}$"

            results_data.append({
                'Task': task_descriptions[task_num],
                'Control Mean': f"${control_mean:.3f}$",
                'Model': 'Vincent',
                'Effect': format_coef(vincent_effect['coef'], vincent_effect['pval']),
                'SE': f"(${vincent_effect['se']:.3f}$)",
                'Pct Change': format_pct_change(vincent_effect['pct_change']),
                'N': sample_size
            })

            results_data.append({
                'Task': task_descriptions[task_num],
                'Control Mean': f"${control_mean:.3f}$",
                'Model': 'o1-preview',
                'Effect': format_coef(gpt_effect['coef'], gpt_effect['pval']),
                'SE': f"(${gpt_effect['se']:.3f}$)",
                'Pct Change': format_pct_change(gpt_effect['pct_change']),
                'N': sample_size
            })

        results_df = pd.DataFrame(results_data)

        # Generate LaTeX table
        latex_table = "\\begin{table}[!htbp]\n\\centering\n"
        latex_table += f"\\caption{{Treatment Effects on {outcome} Across Tasks}}\n"
        latex_table += f"\\label{{tab:{outcome.lower().replace(' ', '_')}_effects}}\n"
        latex_table += "\\begin{tabular}{lcccccc}\n"
        latex_table += "\\hline\\hline\n"
        latex_table += "Task & Control Mean & Model & Effect & SE & \\% Change & N \\\\\n"
        latex_table += "\\hline\n"

        for task in task_descriptions.values():
            task_rows = results_df[results_df['Task'] == task]
            first_row = True
            for _, row in task_rows.iterrows():
                if first_row:
                    latex_table += f"\\multirow{{2}}{{*}}{{{row['Task']}}} & "
                    latex_table += f"\\multirow{{2}}{{*}}{{{row['Control Mean']}}} "
                    first_row = False
                else:
                    latex_table += "& "
                latex_table += f"& {row['Model']} & {row['Effect']} & {row['SE']} & {row['Pct Change']} & {row['N']} \\\\\n"
            latex_table += "\\hline\n"

        latex_table += "\\multicolumn{7}{p{0.95\\linewidth}}{\\footnotesize \\textit{Notes:} "
        latex_table += "Effects shown as absolute increase relative to No AI control group. "
        latex_table += "Percent changes calculated relative to control group mean. "
        latex_table += "Robust standard errors in parentheses. "
        latex_table += "$^{***}p<0.01$, $^{**}p<0.05$, $^{*}p<0.1$. Sample size (N) represents the number of observations used in the regression.}\n"
        latex_table += "\\end{tabular}\n"
        latex_table += "\\end{table}"

        # Save LaTeX table to file
        table_file_path = os.path.join(tables_path, f"{outcome.lower().replace(' ', '_')}_effects.tex")
        with open(table_file_path, "w") as f:
            f.write(latex_table)

        print(f"Table saved: {table_file_path}")

# Run the function to generate all outcome tables
create_outcome_tables()

print("✅ All outcome tables have been generated and saved.")


Generating table for Accuracy...
Table saved: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\accuracy_effects.tex

Generating table for Analysis...
Table saved: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\analysis_effects.tex

Generating table for Organization...
Table saved: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\organization_effects.tex

Generating table for Clarity...
Table saved: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\clarity_effects.tex

Generating table for Professionalism...
Table saved: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\

In [14]:
# Assuming analysis_path is defined
task1_df = pd.read_csv(os.path.join(analysis_path, "task1_data_cleaned.csv"))
task2_df = pd.read_csv(os.path.join(analysis_path, "task2_data_cleaned.csv"))
task3_df = pd.read_csv(os.path.join(analysis_path, "task3_data_cleaned.csv"))
task4_df = pd.read_csv(os.path.join(analysis_path, "task4_data_cleaned.csv"))
task5_df = pd.read_csv(os.path.join(analysis_path, "task5_data_cleaned.csv"))
task6_df = pd.read_csv(os.path.join(analysis_path, "task6_data_cleaned.csv"))

In [15]:
def format_effect(coef, pval):
    stars = get_stars(pval)
    if stars:
        return f"${coef:.2f}^{{{stars}}}$"
    else:
        return f"${coef:.2f}$"

def create_task_table(task_num, task_name):
    # List of outcomes including Productivity
    outcomes = ['Accuracy', 'Analysis', 'Organization', 'Clarity', 'Professionalism',
                'Total Score', 'Time Spent', 'Productivity']

    results_data = []

    # Get all outcome columns for this task
    outcome_cols = {
        'Accuracy': outcome_mappings['Accuracy'][task_num],
        'Analysis': outcome_mappings['Analysis'][task_num],
        'Organization': outcome_mappings['Organization'][task_num],
        'Clarity': outcome_mappings['Clarity'][task_num],
        'Professionalism': outcome_mappings['Professionalism'][task_num],
        'Total Score': outcome_mappings['Total Score'][task_num],
        'Time Spent': outcome_mappings['Time Spent'][task_num],
        'Productivity': f'P{task_num}_Productivity'
    }

    # Get results for each outcome
    for outcome in outcomes:
        col = outcome_cols[outcome]
        vincent_effect, gpt_effect, control_mean, x = extract_treatment_effects(task_num, col)

        # Extract sample size (N) for each outcome
        df = eval(f'task{task_num}_df')
        sample_size = df[col].notna().sum()

        # Add Vincent results
        results_data.append({
            'Outcome': outcome,
            'Control Mean': f"${control_mean:.2f}$",
            'Model': 'Vincent',
            'Effect': format_effect(vincent_effect['coef'], vincent_effect['pval']),
            'SE': f"(${vincent_effect['se']:.2f}$)",
            'Pct Change': format_pct_change(vincent_effect['pct_change']),
            'N': sample_size  # Include sample size
        })

        # Add o1-preview results
        results_data.append({
            'Outcome': outcome,
            'Control Mean': f"${control_mean:.2f}$",
            'Model': 'o1-preview',
            'Effect': format_effect(gpt_effect['coef'], gpt_effect['pval']),
            'SE': f"(${gpt_effect['se']:.2f}$)",
            'Pct Change': format_pct_change(gpt_effect['pct_change']),
            'N': sample_size  # Include sample size
        })

    # Create LaTeX table
    latex_table = "\\begin{table}[!htbp]\n\\centering\n"
    latex_table += f"\\caption{{Treatment Effects for {task_name}}}\n"
    latex_table += f"\\label{{tab:task{task_num}_effects}}\n"
    latex_table += "\\begin{tabular}{lcccccc}\n"
    latex_table += "\\hline\\hline\n"
    latex_table += "Outcome & Control Mean & Model & Effect & SE & \\% Change & N \\\\\n"
    latex_table += "\\hline\n"

    # Add rows
    for outcome in outcomes:
        outcome_rows = [row for row in results_data if row['Outcome'] == outcome]
        first_row = True
        for row in outcome_rows:
            if first_row:
                latex_table += f"\\multirow{{2}}{{*}}{{{row['Outcome']}}} & "
                latex_table += f"\\multirow{{2}}{{*}}{{{row['Control Mean']}}} "
                first_row = False
            else:
                latex_table += "& "
            latex_table += f"& {row['Model']} & {row['Effect']} & {row['SE']} & {row['Pct Change']} & {row['N']} \\\\\n"
        latex_table += "\\hline\n"

    # Add task-specific time limit to notes
    time_limits = {
        1: "60", 2: "240", 3: "120",
        4: "180", 5: "150", 6: "150"
    }

    latex_table += "\\multicolumn{7}{p{0.95\\linewidth}}{\\footnotesize \\textit{Notes:} "
    latex_table += f"Effects shown relative to No AI control group. For quality criteria (Accuracy through Professionalism), "
    latex_table += f"the scoring scale is 1-7, with Total Score ranging from 5-35. Time Spent shows minutes (time limit: {time_limits[task_num]} minutes). "
    latex_table += f"Productivity measures points earned per minute. "
    latex_table += "Percent changes calculated relative to control group mean. "
    latex_table += "Robust standard errors in parentheses. "
    latex_table += "$^{***}p<0.01$, $^{**}p<0.05$, $^{*}p<0.1$}\n"
    latex_table += "\\end{tabular}\n"
    latex_table += "\\end{table}"

    # Save LaTeX table
    with open(f"{analysis_path}/tables/task{task_num}_all_effects.tex", "w") as f:
        f.write(latex_table)

    return latex_table

# Helper function for significance stars
def get_stars(pval):
    if pval < 0.01:
        return "***"
    elif pval < 0.05:
        return "**"
    elif pval < 0.1:
        return "*"
    return ""

# Create tables for all tasks
task_names = {
    1: "Draft Client Email",
    2: "Draft Legal Memo",
    3: "Analysis of Complaint",
    4: "Draft NDA",
    5: "Draft Motion to Consolidate",
    6: "Draft CNC Enforcement Letter"
}

# Generate all task tables
for task_num, task_name in task_names.items():
    print(f"\nGenerating table for Task {task_num}: {task_name}")
    table = create_task_table(task_num, task_name)
    print(f"Table saved as task{task_num}_all_effects.tex")



Generating table for Task 1: Draft Client Email
Table saved as task1_all_effects.tex

Generating table for Task 2: Draft Legal Memo
Table saved as task2_all_effects.tex

Generating table for Task 3: Analysis of Complaint
Table saved as task3_all_effects.tex

Generating table for Task 4: Draft NDA
Table saved as task4_all_effects.tex

Generating table for Task 5: Draft Motion to Consolidate
Table saved as task5_all_effects.tex

Generating table for Task 6: Draft CNC Enforcement Letter
Table saved as task6_all_effects.tex


In [16]:
def analyze_complete_task_data():
    """
    Analyzes tasks with complete data:
    1. Tasks with non-null Total Score
    2. Tasks with non-null Time Spent
    3. Tasks with both Score and Time
    4. Tasks with Productivity data
    
    Returns:
        dict: Summary of completion counts by task and total
    """
    results = {
        "by_task": {}, 
        "total_score_completed": 0, 
        "total_time_recorded": 0,
        "total_both_recorded": 0,
        "total_productivity_recorded": 0
    }
    
    # Iterate through all 6 tasks
    for task_num in range(1, 7):
        # Get the corresponding dataframe for this task
        df = eval(f'task{task_num}_df')
        
        # Get the corresponding columns
        score_col = outcome_mappings['Total Score'][task_num]
        time_col = outcome_mappings['Time Spent'][task_num]
        productivity_col = f'P{task_num}_Productivity'
        
        # Count tasks with non-null Total Score
        score_completed_count = df[score_col].notna().sum()
        
        # Count tasks with non-null Time Spent
        time_recorded_count = df[time_col].notna().sum()
        
        # Count tasks with both score and time
        both_recorded_count = df[df[score_col].notna() & df[time_col].notna()].shape[0]
        
        # Count tasks with productivity data
        productivity_recorded_count = df[productivity_col].notna().sum() if productivity_col in df.columns else 0
        
        # Store results for this task
        results["by_task"][task_num] = {
            "task_name": task_names[task_num],
            "score_completed_count": score_completed_count,
            "time_recorded_count": time_recorded_count,
            "both_recorded_count": both_recorded_count,
            "productivity_recorded_count": productivity_recorded_count
        }
        
        # Update totals
        results["total_score_completed"] += score_completed_count
        results["total_time_recorded"] += time_recorded_count
        results["total_both_recorded"] += both_recorded_count
        results["total_productivity_recorded"] += productivity_recorded_count
    
    return results

# Run the analysis
completion_analysis = analyze_complete_task_data()

# Print the results
print(f"Total tasks with score across all treatment groups: {completion_analysis['total_score_completed']}")
print(f"Total tasks with time recorded: {completion_analysis['total_time_recorded']}")
print(f"Total tasks with both score and time: {completion_analysis['total_both_recorded']}")
print(f"Total tasks with productivity data: {completion_analysis['total_productivity_recorded']}")

print("\nBreakdown by task:")
for task_num, stats in completion_analysis["by_task"].items():
    print(f"Task {task_num} ({stats['task_name']}):")
    print(f"  Score completed count: {stats['score_completed_count']}")
    print(f"  Time recorded count: {stats['time_recorded_count']}")
    print(f"  Both score and time recorded: {stats['both_recorded_count']}")
    print(f"  Productivity recorded count: {stats['productivity_recorded_count']}")

Total tasks with score across all treatment groups: 767
Total tasks with time recorded: 764
Total tasks with both score and time: 764
Total tasks with productivity data: 764

Breakdown by task:
Task 1 (Draft Client Email):
  Score completed count: 135
  Time recorded count: 134
  Both score and time recorded: 134
  Productivity recorded count: 134
Task 2 (Draft Legal Memo):
  Score completed count: 125
  Time recorded count: 124
  Both score and time recorded: 124
  Productivity recorded count: 124
Task 3 (Analysis of Complaint):
  Score completed count: 127
  Time recorded count: 126
  Both score and time recorded: 126
  Productivity recorded count: 126
Task 4 (Draft NDA):
  Score completed count: 127
  Time recorded count: 127
  Both score and time recorded: 127
  Productivity recorded count: 127
Task 5 (Draft Motion to Consolidate):
  Score completed count: 127
  Time recorded count: 127
  Both score and time recorded: 127
  Productivity recorded count: 127
Task 6 (Draft CNC Enforce

Make some nice figures

In [17]:
def create_density_plots(task_num, task_name):
    # Create the figures directory if it doesn't exist
    figures_path = os.path.join(analysis_path, 'figures')
    os.makedirs(figures_path, exist_ok=True)
    
    # Set the style
    plt.style.use('seaborn-v0_8-whitegrid')
    
    # Create figures for both Total Score and Time Spent
    fig_score, ax_score = plt.subplots(figsize=(10, 6))
    fig_time, ax_time = plt.subplots(figsize=(10, 6))
    
    # Get data
    df = eval(f'task{task_num}_df')
    score_col = f'P{task_num}_Total_Score'
    
    time_col = time_cols[task_num]
    
    # Colors and display names mapping
    colors = {
        'No AI': ('lightblue', 'blue'),  # (fill color, mean line color)
        'GPT 01': ('bisque', 'red'),     # Keep dataset variable name
        'Vincent': ('lightgreen', 'darkgreen')
    }
    
    # Display name mapping
    display_names = {
        'No AI': 'No AI',
        'GPT 01': 'o1-preview',  # Map to display name
        'Vincent': 'Vincent'
    }
    
    # Plot density for each condition - Total Score
    for condition in ['No AI', 'GPT 01', 'Vincent']:
        score_data = df[df['AI_Condition'] == condition][score_col].dropna()
        score_mean = score_data.mean()
        display_name = display_names[condition]
        
        # Plot score density
        sns.kdeplot(data=score_data,
                   fill=True,
                   alpha=0.5,
                   color=colors[condition][0],
                   label=display_name,
                   ax=ax_score)
        
        # Add vertical line for score mean
        ax_score.axvline(x=score_mean,
                        color=colors[condition][1],
                        linestyle='-',
                        label=f'Mean ({display_name})')
    
    # Customize score plot
    ax_score.set_title(f'Total Score Distribution: {task_name}',
                      fontsize=14,
                      pad=20)
    ax_score.set_xlabel('Total Score', fontsize=12)
    ax_score.set_ylabel('Density', fontsize=12)
    ax_score.legend()
    
    # Plot density for each condition - Time Spent
    for condition in ['No AI', 'GPT 01', 'Vincent']:
        time_data = df[df['AI_Condition'] == condition][time_col].dropna()
        time_mean = time_data.mean()
        display_name = display_names[condition]
        
        # Plot time density
        sns.kdeplot(data=time_data,
                   fill=True,
                   alpha=0.5,
                   color=colors[condition][0],
                   label=display_name,
                   ax=ax_time)
        
        # Add vertical line for time mean
        ax_time.axvline(x=time_mean,
                       color=colors[condition][1],
                       linestyle='-',
                       label=f'Mean ({display_name})')
    
    # Customize time plot
    ax_time.set_title(f'Time Spent Distribution: {task_name}',
                     fontsize=14,
                     pad=20)
    ax_time.set_xlabel('Time Spent (minutes)', fontsize=12)
    ax_time.set_ylabel('Density', fontsize=12)
    ax_time.legend()
    
    # Save figures
    score_file = os.path.join(figures_path, f'task{task_num}_score_density.png')
    time_file = os.path.join(figures_path, f'task{task_num}_time_density.png')
    
    fig_score.savefig(score_file, dpi=300, bbox_inches='tight')
    fig_time.savefig(time_file, dpi=300, bbox_inches='tight')
    
    # Close all figures to free memory
    plt.close('all')

# Create plots for each task
task_names = {
    1: "Draft Client Email",
    2: "Draft Legal Memo",
    3: "Analysis of Complaint",
    4: "Draft NDA",
    5: "Draft Motion to Consolidate",
    6: "Draft CNC Enforcement Letter"
}

# Generate all plots
for task_num, task_name in task_names.items():
    print(f"Generating plots for Task {task_num}: {task_name}")
    create_density_plots(task_num, task_name)
    print(f"Saved plots for Task {task_num}")

Generating plots for Task 1: Draft Client Email
Saved plots for Task 1
Generating plots for Task 2: Draft Legal Memo
Saved plots for Task 2
Generating plots for Task 3: Analysis of Complaint
Saved plots for Task 3
Generating plots for Task 4: Draft NDA
Saved plots for Task 4
Generating plots for Task 5: Draft Motion to Consolidate
Saved plots for Task 5
Generating plots for Task 6: Draft CNC Enforcement Letter
Saved plots for Task 6


# Running regression with controls 

First, getting doing mergers and gettings the controls 

In [18]:
qualtrics_folder = os.path.join(master_folder, "Qualtrics surveys")
vincent_survey = pd.read_excel(f"{qualtrics_folder}/Enrollment survey/Vincent AI RCT_November 27, 2024_04.01.xlsx", skiprows=[1])

# REVISED: First identify actual duplicates based on original Q3 (name) OR Q5 (email)
name_duplicates = vincent_survey[vincent_survey.duplicated(subset=['Q3'], keep=False) & 
                                ~vincent_survey['Q3'].isna()]
email_duplicates = vincent_survey[vincent_survey.duplicated(subset=['Q5'], keep=False) & 
                                 ~vincent_survey['Q5'].isna()]

# Combine all identified duplicates
all_duplicates_index = pd.concat([name_duplicates, email_duplicates]).index.unique()
duplicate_records = vincent_survey.loc[all_duplicates_index].copy()

# For the actual duplicates, sort by completeness and keep most complete
if not duplicate_records.empty:
    duplicate_records['completeness'] = duplicate_records.notna().sum(axis=1)
    
    # Create a temporary dataframe to store deduplicated records
    deduplicated = []
    
    # Handle name duplicates
    for name in duplicate_records['Q3'].dropna().unique():
        name_group = duplicate_records[duplicate_records['Q3'] == name]
        if len(name_group) > 1:  # Only process actual duplicates
            best_record = name_group.sort_values('completeness', ascending=False).iloc[0]
            deduplicated.append(best_record)
    
    # Handle email duplicates that weren't already handled by name
    for email in duplicate_records['Q5'].dropna().unique():
        email_group = duplicate_records[duplicate_records['Q5'] == email]
        # Only include records not already deduplicated by name
        if len(email_group) > 1 and not all(idx in [d.name for d in deduplicated] for idx in email_group.index):
            best_record = email_group.sort_values('completeness', ascending=False).iloc[0]
            deduplicated.append(best_record)
    
    # Create dataframe of deduplicated records
    deduplicated_df = pd.DataFrame(deduplicated)
    
    # Get non-duplicate records
    non_duplicates = vincent_survey.drop(all_duplicates_index)
    
    # Combine non-duplicates with deduplicated records
    vincent_survey = pd.concat([non_duplicates, deduplicated_df])
else:
    # No duplicates found, keep all records
    print("No duplicates found in the survey data.")

# Drop the temporary completeness column if it exists
if 'completeness' in vincent_survey.columns:
    vincent_survey = vincent_survey.drop('completeness', axis=1)

# Merge datasets using original names
merged_df = final_df.merge(
    vincent_survey[['Q3', 'Q5', 'Q6', 'Q7', 'Q8']],
    left_on='Student name',
    right_on='Q3',
    how='left',
    indicator=True
)

# Check for any unmatched records
unmatched = merged_df[merged_df['_merge'] == 'left_only']
print(f"\nUnmatched students: {len(unmatched)}")
if not unmatched.empty:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    unmatched[['Student Number', 'Student name']].to_excel(
        os.path.join(debug_path, f'unmatched_{timestamp}.xlsx'), index=False)

# Check for any duplicates
duplicates = merged_df[merged_df.duplicated(subset=['Student Number'], keep=False)]
if len(duplicates) > 0:
    print("\nWarning: Found duplicates for these students:")
    for _, row in duplicates.drop_duplicates(subset=['Student Number']).iterrows():
        print(f"Student Number: {row['Student Number']}, Name: {row['Student name']}")
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    duplicates.to_excel(os.path.join(debug_path, f'duplicates_{timestamp}.xlsx'), index=False)

# Rename columns
merged_df = merged_df.rename(columns={
    'Q5': 'Email',
    'Q6': 'Student_Type',
    'Q7': 'GPA',
    'Q8': 'AI_Use'
})

# If any duplicates remain, keep the most complete row
if len(duplicates) > 0:
    merged_df['completeness'] = merged_df.notna().sum(axis=1)
    merged_df = merged_df.sort_values('completeness', ascending=False).drop_duplicates('Student Number')
    merged_df = merged_df.drop('completeness', axis=1)

# Save merge results with detailed statistics
merge_stats = pd.DataFrame({
    'Metric': [
        'Total Students', 
        'Matched Students', 
        'Missing Survey Data',
        'Students with Email',
        'Students with Student Type',
        'Students with GPA',
        'Students with AI Use'
    ],
    'Count': [
        len(merged_df),
        (merged_df['_merge'] == 'both').sum(),
        (merged_df['_merge'] == 'left_only').sum(),
        merged_df['Email'].notna().sum(),
        merged_df['Student_Type'].notna().sum(),
        merged_df['GPA'].notna().sum(),
        merged_df['AI_Use'].notna().sum()
    ]
})

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
with pd.ExcelWriter(os.path.join(debug_path, f'direct_merge_results_{timestamp}.xlsx')) as writer:
    merged_df.to_excel(writer, sheet_name='Merged_Data', index=False)
    merge_stats.to_excel(writer, sheet_name='Merge_Statistics', index=False)

# Print summary
print("\nMerge Summary:")
print(f"Total students: {len(merged_df)}")
print(f"Successfully matched: {(merged_df['_merge'] == 'both').sum()}")
print(f"Missing survey data: {(merged_df['_merge'] == 'left_only').sum()}")

# Update final_df with the new data
final_df_with_controls = merged_df.drop(['Q3', '_merge'], axis=1)

print(f"\nDirect merge results saved to: {os.path.join(debug_path, f'direct_merge_results_{timestamp}.xlsx')}")


Unmatched students: 0

Student Number: af90ee0cf16ffd33, Name: af90ee0cf16ffd33
Student Number: 191cebe1180b87c5, Name: 191cebe1180b87c5
Student Number: 7dd7e803832f1b53, Name: 7dd7e803832f1b53
Student Number: f3e93859c85d9caf, Name: f3e93859c85d9caf
Student Number: b066caa360c869a0, Name: b066caa360c869a0
Student Number: e0a9f0ffff131843, Name: e0a9f0ffff131843

Merge Summary:
Total students: 153
Successfully matched: 153
Missing survey data: 0

Direct merge results saved to: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\debug\direct_merge_results_20250306_143817.xlsx


Correcting inconsistent GPA values

In [19]:
# Define the corrections dictionary (normalized keys)
corrections = {
    "i received a 3.267 1l fall semester and a 3.333 1l spring semester. (not sure gpa for overall 1l year)": (3.267 + 3.333) / 2,
    "3.461/4.333": (3.461 / 4.333) * 4,  # Scale to 4.0 system
    "2.1 (british grading system)": 3.5  # Approximate WES conversion
}

# Function to normalize text (to match dictionary keys)
def normalize_text(value):
    if isinstance(value, str):  # Ensure it's a string
        return value.strip().lower().replace("\n", " ").replace("\r", " ")
    return value  # Keep non-string values unchanged

# Create a copy for tracking changes
original_gpa = final_df_with_controls["GPA"].copy()

# Normalize the GPA column for matching
final_df_with_controls["normalized_GPA"] = final_df_with_controls["GPA"].map(normalize_text)

# Apply corrections **only where the normalized GPA matches a key in corrections**
mask = final_df_with_controls["normalized_GPA"].isin(corrections.keys())
final_df_with_controls.loc[mask, "GPA"] = final_df_with_controls["normalized_GPA"].map(corrections)

# Drop the temporary column used for matching
final_df_with_controls.drop(columns=["normalized_GPA"], inplace=True)

In [20]:
# Define path for saving the controls file
controls_path = analysis_path
os.makedirs(controls_path, exist_ok=True)

# Remove duplicates from final_df_with_controls based on completeness
final_df_with_controls['completeness'] = final_df_with_controls.notna().sum(axis=1)
final_df_with_controls = final_df_with_controls.sort_values('completeness', ascending=False).drop_duplicates('Student name')
final_df_with_controls = final_df_with_controls.drop('completeness', axis=1)

# Save only the necessary columns
controls_df = final_df_with_controls[['Student Number', 'Student name', 'GPA', 'Student_Type', 'AI_Use']]
controls_df.columns = ['Student_Number', 'Student_Name', 'GPA', 'Student_Type', 'AI_Use']

# Save original controls file
controls_file = os.path.join(controls_path, "controls.csv")
controls_df.to_csv(controls_file, index=False)
print(f"\nControls file saved successfully at: {controls_file}")

# Create a numeric version of controls
controls_numeric_df = controls_df.copy()

# Convert GPA and AI Use to numeric, setting invalid values to NaN
for col in ['GPA', 'AI_Use']:
    controls_numeric_df[col] = pd.to_numeric(controls_numeric_df[col], errors='coerce')

# Save numeric controls file
controls_numeric_file = os.path.join(controls_path, "controls_numeric.csv")
controls_numeric_df.to_csv(controls_numeric_file, index=False)
print(f"\nNumeric controls file saved successfully at: {controls_numeric_file}")


Controls file saved successfully at: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\controls.csv

Numeric controls file saved successfully at: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\controls_numeric.csv


In [21]:
# Load numeric controls (GPA, AI Use, and Student Type)
controls_numeric_file = os.path.join(analysis_path, "controls_numeric.csv")
controls_numeric_df = pd.read_csv(controls_numeric_file)

print(f"\nLoaded controls from: {controls_numeric_file}")
print(controls_numeric_df.head())

# Ensure correct data types for GPA
controls_numeric_df['GPA'] = pd.to_numeric(controls_numeric_df['GPA'], errors='coerce')

# ✅ **Fix: Convert Student_Type and AI_Use to integers explicitly**
controls_numeric_df['Student_Type'] = controls_numeric_df['Student_Type'].fillna(0).astype(int)
controls_numeric_df['AI_Use'] = controls_numeric_df['AI_Use'].fillna(0).astype(int)

# Convert Student_Type and AI_Use to categorical dummies, dropping the first category to avoid multicollinearity
controls_numeric_df = pd.get_dummies(controls_numeric_df, columns=['Student_Type', 'AI_Use'], drop_first=True)

# Function to load task data and merge with controls
def load_and_merge_task(task_num):
    task_file = os.path.join(analysis_path, f"task{task_num}_data_cleaned.csv")

    if not os.path.exists(task_file):
        print(f"Warning: {task_file} not found!")
        return None

    task_df = pd.read_csv(task_file)

    # Merge with controls
    merged_df = task_df.merge(controls_numeric_df, on='Student_Number', how='left')

    return merged_df

# Load all task dataframes
task_dfs = {}
for task_num in range(1, 7):
    task_dfs[task_num] = load_and_merge_task(task_num)


# Function to analyze outcome with controls (includes Student Type and AI Use)
def analyze_outcome_with_controls(df, outcome_var, task_num):
    if df is None or outcome_var not in df.columns:
        print(f"Skipping analysis for {outcome_var} - Task {task_num} (data missing)")
        return None

    # Convert outcome variable to numeric
    df[outcome_var] = pd.to_numeric(df[outcome_var], errors='coerce')

    # Create treatment dummies
    df['Vincent_dummy'] = (df['AI_Condition'] == 'Vincent').astype(float)
    df['GPT01_dummy'] = (df['AI_Condition'] == 'GPT 01').astype(float)

    # Identify control variables dynamically (GPA + all Student_Type and AI_Use dummies)
    control_vars = ['Vincent_dummy', 'GPT01_dummy', 'GPA'] + \
                   [col for col in df.columns if 'Student_Type_' in col or 'AI_Use_' in col]

    # Clean data for regression
    df_clean = df.dropna(subset=[outcome_var] + control_vars)

    # Print debugging info
    print(f"\nShape before cleaning: {df.shape}")
    print(f"Shape after cleaning: {df_clean.shape}")

    # Prepare X and y with control variables
    X = df_clean[control_vars].astype(float)
    X = sm.add_constant(X)
    y = df_clean[outcome_var].astype(float)

    # Run regression
    model = sm.OLS(y, X)
    results = model.fit(cov_type='HC1')

    # Map AI Use and Student Type coefficients back to readable labels
    def map_variable_names(var):
        if "AI_Use_" in var:
            num_value = int(var.split("_")[-1])
            return f"AI Use: {ai_use_mapping.get(num_value, 'Unknown')}"
        elif "Student_Type_" in var:
            num_value = int(var.split("_")[-1])
            return f"Student Type: {student_type_mapping.get(num_value, 'Unknown')}"
        return var

    # Rename coefficients for better readability
    coef_labels = [map_variable_names(var) for var in results.params.index]

    # Print output in readable format
    print(f"\nAnalysis for {outcome_var} - Task {task_num} with Controls")
    print(f"Number of observations: {len(df_clean)}")
    print("\nSample breakdown:")
    print(df_clean['AI_Condition'].value_counts())
    print("\nOutcome variable summary:")
    print(df_clean[outcome_var].describe())
    print("\nRegression Results:")
    
    # Format regression output with readable labels
    summary_table = pd.DataFrame({
        "Variable": coef_labels,
        "Coefficient": results.params.values,
        "Std. Error": results.bse.values,
        "p-value": results.pvalues.values
    })
    print(summary_table)

    return results

# Run regressions for all tasks and outcomes with updated column names
for task_num, df in task_dfs.items():
    if df is None:
        continue

    outcomes = [
        f'P{task_num}_Criteria_1_Accuracy',
        f'P{task_num}_Criteria_2_Analysis',
        f'P{task_num}_Criteria_3_Organization',
        f'P{task_num}_Criteria_4_Clarity',
        f'P{task_num}_Criteria_5_Professionalism',
        f'P{task_num}_Total_Score',
        time_cols[task_num],
        f'P{task_num}_Productivity'
    ]

    print(f"\n{'='*50}")
    print(f"Processing Task {task_num}")
    print(f"{'='*50}")

    for outcome in outcomes:
        try:
            results = analyze_outcome_with_controls(df, outcome, task_num)
        except Exception as e:
            print(f"Error processing {outcome}")
            print(f"Error details: {str(e)}")
            continue


Loaded controls from: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\controls_numeric.csv
     Student_Number      Student_Name    GPA  Student_Type  AI_Use
0  9e2e00fa7c285ff5  9e2e00fa7c285ff5  2.920           2.0     1.0
1  b0eea11d8aa104e7  b0eea11d8aa104e7  3.366           2.0     2.0
2  ad4e0a23d55a9bc2  ad4e0a23d55a9bc2  3.330           2.0     4.0
3  22357cb4b713c556  22357cb4b713c556  3.400           2.0     1.0
4  eab55a7284651fe8  eab55a7284651fe8  3.680           1.0     5.0

Processing Task 1

Shape before cleaning: (135, 25)
Shape after cleaning: (119, 25)

Analysis for P1_Criteria_1_Accuracy - Task 1 with Controls
Number of observations: 119

Sample breakdown:
AI_Condition
GPT 01     41
Vincent    40
No AI      38
Name: count, dtype: int64

Outcome variable summary:
count    119.000000
mean       3.386555
std        1.712908
min        1.000000
25%        2.000000
50%        3.000000
75%   

In [22]:
def extract_treatment_effects_with_controls(df, outcome_var, task_num, controls_numeric_df=None, 
                                            student_type_mapping=None, ai_use_mapping=None):
    """Extract treatment effects and control variable coefficients with robust handling."""
    
    if df is None:
        print(f"⚠️ No data for Task {task_num}")
        return None

    if outcome_var not in df.columns:
        print(f"⚠️ Column {outcome_var} not found in task dataframe")
        return None

    # Merge missing control variables (if needed)
    if controls_numeric_df is not None and 'GPA' not in df.columns:
        try:
            common_cols = df.columns.intersection(controls_numeric_df.columns)
            df = df.join(controls_numeric_df.drop(columns=common_cols), how='left')
        except Exception as e:
            print(f"⚠️ Error joining control_numeric_df: {e}")
            return None

    # Convert outcome and control variables to numeric
    df[outcome_var] = pd.to_numeric(df[outcome_var], errors='coerce')
    df['GPA'] = pd.to_numeric(df.get('GPA'), errors='coerce')

    for col in df.columns:
        if col.startswith(('Student_Type_', 'AI_Use_')):
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # Create treatment dummies
    df['Vincent_dummy'] = (df['AI_Condition'] == 'Vincent').astype(float)
    df['GPT01_dummy'] = (df['AI_Condition'] == 'GPT 01').astype(float)

    # Define control variables
    control_vars = ['Vincent_dummy', 'GPT01_dummy', 'GPA']
    student_type_vars = sorted([col for col in df.columns if col.startswith('Student_Type_')])
    ai_use_vars = sorted([col for col in df.columns if col.startswith('AI_Use_')])

    # Ensure unique control variables
    all_vars = list(dict.fromkeys(control_vars + student_type_vars + ai_use_vars))

    # Drop missing values
    df_clean = df.dropna(subset=[outcome_var] + all_vars)
    if len(df_clean) < 10:
        print(f"⚠️ Too few observations ({len(df_clean)}) for Task {task_num}, Outcome {outcome_var}")
        return None

    X_clean = df_clean[all_vars].astype(float)
    y_clean = df_clean[outcome_var].astype(float)

    # Calculate control group mean
    control_mean = df_clean[df_clean['AI_Condition'] == 'No AI'][outcome_var].mean()

    # Run regression
    X = sm.add_constant(X_clean)
    y = y_clean

    try:
        model = sm.OLS(y, X)
        results = model.fit(cov_type='HC1')

        # Extract treatment effects
        effects = []
        for label, dummy in [('Vincent', 'Vincent_dummy'), ('o1-preview', 'GPT01_dummy')]:
            coef = results.params[dummy]
            se = results.bse[dummy]
            pval = results.pvalues[dummy]
            pct_change = (coef / control_mean) * 100 if control_mean else None

            effect_str, se_str = format_coefficient(coef, se, pval)
            pct_str = format_pct_change(pct_change)

            effects.append({
                'Model': label,
                'Effect': effect_str,
                'SE': se_str,
                'Pct Change': pct_str,
                'N': len(df_clean)
            })

        # Extract control variable coefficients
        controls = []
        if 'GPA' in results.params:
            coef = results.params['GPA']
            se = results.bse['GPA']
            pval = results.pvalues['GPA']
            effect_str, se_str = format_coefficient(coef, se, pval)
            controls.append({'name': 'GPA', 'coef': effect_str, 'se': se_str})

        for var in results.params.index:
            if var not in ['const', 'GPA', 'Vincent_dummy', 'GPT01_dummy']:
                coef = results.params[var]
                se = results.bse[var]
                pval = results.pvalues[var]
                effect_str, se_str = format_coefficient(coef, se, pval)

                # Format variable name
                if var.startswith('Student_Type_') and student_type_mapping:
                    type_num = int(var.split('_')[-1])
                    var_name = f"{student_type_mapping.get(type_num, 'Unknown')} (vs. 2L)"
                elif var.startswith('AI_Use_') and ai_use_mapping:
                    use_num = int(var.split('_')[-1])
                    var_name = f"{ai_use_mapping.get(use_num, 'Unknown')} (vs. 0 Times)"
                else:
                    var_name = var

                controls.append({'name': var_name, 'coef': effect_str, 'se': se_str})

        return {
            'effects': effects,
            'controls': controls,
            'control_mean': control_mean,
            'n_obs': len(df_clean)
        }

    except Exception as e:
        print(f"⚠️ Error in regression for Task {task_num}, Outcome {outcome_var}: {e}")
        return None


def create_task_table_with_controls(task_num, task_name, df, tables_path):
    """Generate LaTeX table for a specific task including all controls"""
    print(f"\nProcessing Task {task_num}: {task_name}")
    
    if df is None:
        print(f"⚠️ No data for Task {task_num}")
        return
    
    # Define outcomes
    outcomes = {
        'Accuracy': f'P{task_num}_Criteria_1_Accuracy',
        'Analysis': f'P{task_num}_Criteria_2_Analysis',
        'Organization': f'P{task_num}_Criteria_3_Organization',
        'Clarity': f'P{task_num}_Criteria_4_Clarity',
        'Professionalism': f'P{task_num}_Criteria_5_Professionalism',
        'Total Score': f'P{task_num}_Total_Score',
        'Time Spent': f'Time_Spent_Assignment_{task_num}',
        'Productivity': f'P{task_num}_Productivity'
    }
    
    # Collect results
    results_data = []
    control_data = None
    
    for outcome_name, outcome_var in outcomes.items():
        result = extract_treatment_effects_with_controls(df, outcome_var, task_num)
        if result is None:
            continue
            
        # Store treatment effects
        for effect in result['effects']:
            results_data.append({
                'Outcome': outcome_name,
                'Control Mean': f"${result['control_mean']:.2f}$",
                **effect
            })
        
        # Store control effects (only need to do this once)
        if control_data is None:
            control_data = result['controls']
    
    if not results_data:
        print(f"⚠️ No results to display for Task {task_num}")
        return
    
    # Generate LaTeX table
    latex_table = "\\begin{table}[!htbp]\n\\centering\n"
    latex_table += f"\\caption{{Treatment Effects for {task_name} (With Controls)}}\n"
    latex_table += f"\\label{{tab:task{task_num}_effects_controls}}\n"
    latex_table += "\\vspace{0.3cm}\n"  # vertical space after title
    
    # Panel A: Treatment Effects
    latex_table += "\\begin{tabular}{lcccccc}\n"
    latex_table += "\\multicolumn{7}{l}{\\textbf{Panel A: Treatment Effects}} \\\\\n"
    latex_table += "\\hline\\hline\n"
    latex_table += "Outcome & Control Mean & Model & Effect & SE & \\% Change & N \\\\\n"
    latex_table += "\\hline\n"
    
    current_outcome = None
    for row in results_data:
        if current_outcome != row['Outcome']:
            current_outcome = row['Outcome']
            # First row: include multirow cells and then treatment effect
            latex_table += (
                f"\\multirow{{2}}{{*}}{{{row['Outcome']}}} & "
                f"\\multirow{{2}}{{*}}{{{row['Control Mean']}}} & "
                f"{row['Model']} & {row['Effect']} & {row['SE']} & {row['Pct Change']} & {row['N']} \\\\\n"
            )
        else:
            # Second row: leave outcome and control mean cells empty
            latex_table += (
                f"& & {row['Model']} & {row['Effect']} & {row['SE']} & {row['Pct Change']} & {row['N']} \\\\\n"
            )
            latex_table += "\\hline\n"
    
    latex_table += "\\end{tabular}\n\n"
    latex_table += "\\vspace{0.3cm}\n\n"  # vertical space between Panel A and Panel B
    
    # Panel B: Control Variables
    if control_data:
        latex_table += "\\begin{tabular}{lcc}\n"
        latex_table += "\\multicolumn{3}{l}{\\textbf{Panel B: Control Variables}} \\\\\n"
        latex_table += "\\hline\\hline\n"
        latex_table += "Variable & Coefficient & SE \\\\\n"
        latex_table += "\\hline\n"
        
        for control in control_data:
            latex_table += f"{control['name']} & {control['coef']} & {control['se']} \\\\\n"
        
        latex_table += "\\hline\n"
        latex_table += "\\end{tabular}\n"
    
    # Add notes
    latex_table += "\\begin{tablenotes}\n\\small\n"
    latex_table += (
        "\\item \\textit{Notes:} Effects shown relative to No AI control group. "
        "For quality criteria (Accuracy through Professionalism), scoring scale is 1-7. "
        "Total Score ranges from 5-35. Time Spent shows minutes. "
        "Productivity measures points earned per minute. "
        "AI use frequency refers to the last three months. "
        "Robust standard errors in parentheses. "
        "$^{***}p<0.01$, $^{**}p<0.05$, $^{*}p<0.1$\n"
    )
    latex_table += "\\end{tablenotes}\n"
    
    latex_table += "\\end{table}"
    
    # Save the table
    os.makedirs(tables_path, exist_ok=True)
    table_file_path = os.path.join(tables_path, f"task{task_num}_effects_with_controls.tex")
    with open(table_file_path, "w") as f:
        f.write(latex_table)
    
    print(f"✅ Saved LaTeX table: {table_file_path}")

In [23]:
def generate_all_tables(task_dfs, master_folder):
    """Generate tables for all tasks"""
    
    task_names = {
        1: "Draft Client Email",
        2: "Draft Legal Memo",
        3: "Analysis of Complaint",
        4: "Draft NDA",
        5: "Draft Motion to Consolidate",
        6: "Draft CNC Enforcement Letter"
    }

    for task_num, task_name in task_names.items():
        if task_num in task_dfs:
            create_task_table_with_controls(task_num, task_name, task_dfs[task_num], tables_path)
        else:
            print(f"⚠️ No data for Task {task_num}")

# Now run the function
generate_all_tables(task_dfs, master_folder)


Processing Task 1: Draft Client Email
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task1_effects_with_controls.tex

Processing Task 2: Draft Legal Memo
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task2_effects_with_controls.tex

Processing Task 3: Analysis of Complaint
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task3_effects_with_controls.tex

Processing Task 4: Draft NDA
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task4_effects_with_controls.tex

Processing Task 5: Draft Motion to Consolidate
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawye

In [24]:
# --- TABLE GENERATION FOR OUTCOMES ---
def create_outcome_table_with_controls(outcome, outcome_map, task_dfs, tables_path):
    """Generate LaTeX table for a specific outcome across all tasks."""
    rows = []
    control_data = None
    for task in sorted(task_descriptions.keys()):
        if task not in task_dfs:
            print(f"⚠️ No data for Task {task}")
            continue
        df = task_dfs[task]
        outcome_var = outcome_map.get(task)
        if outcome_var is None:
            print(f"⚠️ Outcome mapping missing for Task {task} in {outcome}")
            continue
        result = extract_treatment_effects_with_controls(df, outcome_var, task)
        if result is None:
            continue
        control_mean_str = f"${result['control_mean']:.2f}$"
        for eff in result['effects']:
            rows.append({
                'Task': task_descriptions[task],
                'Control Mean': control_mean_str,
                'Model': eff['Model'],
                'Effect': eff['Effect'],
                'SE': eff['SE'],
                'Pct Change': eff['Pct Change'],
                'N': eff['N']
            })
        if control_data is None:
            control_data = result['controls']

    if not rows:
        print(f"⚠️ No results for outcome {outcome}")
        return

    # Build LaTeX table
    latex_table = "\\begin{table}[!htbp]\n\\centering\n"
    latex_table += f"\\caption{{Treatment Effects on {outcome} Across Tasks}}\n"
    latex_table += f"\\label{{tab:{outcome.lower().replace(' ', '_')}_effects_controls}}\n"
    latex_table += "\\vspace{0.3cm}\n"
    latex_table += "\\begin{tabular}{lcccccc}\n"
    latex_table += "\\hline\\hline\n"
    latex_table += "Task & Control Mean & Model & Effect & SE & \\% Change & N \\\\\n"
    latex_table += "\\hline\n"

    current_task = None
    for row in rows:
        if current_task != row['Task']:
            current_task = row['Task']
            latex_table += (
                f"\\multirow{{2}}{{*}}{{{row['Task']}}} & "
                f"\\multirow{{2}}{{*}}{{{row['Control Mean']}}} & "
                f"{row['Model']} & {row['Effect']} & {row['SE']} & {row['Pct Change']} & {row['N']} \\\\\n"
            )
        else:
            latex_table += (
                f"& & {row['Model']} & {row['Effect']} & {row['SE']} & {row['Pct Change']} & {row['N']} \\\\\n"
                f"\\hline\n"
            )
    latex_table += "\\hline\\hline\n"
    latex_table += "\\end{tabular}\n\n"
    latex_table += "\\vspace{0.5cm}\n\n"

    # Panel B: Control Variables
    if control_data:
        latex_table += "\\begin{tabular}{lcc}\n"
        latex_table += "\\multicolumn{3}{l}{\\textbf{Panel B: Control Variables}} \\\\\n"
        latex_table += "\\hline\\hline\n"
        latex_table += "Variable & Coefficient & SE \\\\\n"
        latex_table += "\\hline\n"
        for ctrl in control_data:
            latex_table += f"{ctrl['name']} & {ctrl['coef']} & {ctrl['se']} \\\\\n"
        latex_table += "\\hline\n"
        latex_table += "\\end{tabular}\n"

    latex_table += "\\begin{tablenotes}\n\\small\n"
    latex_table += ("\\item \\textit{Notes:} Effects are shown relative to the No AI control group. "
                    "Robust standard errors (in parentheses) are reported. "
                    "$^{***}p<0.01$, $^{**}p<0.05$, $^{*}p<0.1$.")
    latex_table += "\n\\end{tablenotes}\n"
    latex_table += "\\end{table}"

    # Save the table
    os.makedirs(tables_path, exist_ok=True)
    table_file_path = os.path.join(tables_path, f"{outcome.lower().replace(' ', '_')}_effects_controls.tex")
    with open(table_file_path, "w") as f:
        f.write(latex_table)
    print(f"✅ Saved LaTeX table: {table_file_path}")

def generate_all_outcome_tables(task_dfs, master_folder):
    """Generate all outcome tables."""
    for outcome, outcome_map in outcome_mappings.items():
        create_outcome_table_with_controls(outcome, outcome_map, task_dfs, tables_path)

In [25]:
generate_all_outcome_tables(task_dfs, master_folder)

✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\accuracy_effects_controls.tex
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\analysis_effects_controls.tex
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\organization_effects_controls.tex
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\clarity_effects_controls.tex
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\professionalism_effects_controls.tex
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - A

# Subgroup analyis by GPA quartile and student type

In [26]:
def extract_subgroup_effects(df, outcome, task_num, subgroup_name, subgroup_value):
    """Extract treatment effects for a specific subgroup."""
    if df is None or outcome not in df.columns:
        print(f"Skipping {outcome} - Task {task_num} - {subgroup_name} {subgroup_value}")
        return None
    
    # Subset data for specific subgroup
    df_subset = df[df[subgroup_name] == subgroup_value].copy()
    
    # Create treatment dummies
    df_subset['Vincent_dummy'] = (df_subset['AI_Condition'] == 'Vincent').astype(int)
    df_subset['GPT01_dummy'] = (df_subset['AI_Condition'] == 'GPT 01').astype(int)
    
    # Clean data
    df_clean = df_subset.dropna(subset=[outcome, 'Vincent_dummy', 'GPT01_dummy'])
    
    if len(df_clean) < 10:
        return None
        
    # Calculate control group mean
    control_mean = df_clean[df_clean['AI_Condition'] == 'No AI'][outcome].mean()
    
    # Run regression
    X = df_clean[['Vincent_dummy', 'GPT01_dummy']]
    X = sm.add_constant(X)
    y = df_clean[outcome]
    
    try:
        model = sm.OLS(y, X)
        results = model.fit(cov_type='HC1')
        
        return {
            'control_mean': control_mean,
            'vincent_coef': results.params['Vincent_dummy'],
            'vincent_se': results.bse['Vincent_dummy'],
            'vincent_p': results.pvalues['Vincent_dummy'],
            'gpt_coef': results.params['GPT01_dummy'],
            'gpt_se': results.bse['GPT01_dummy'],
            'gpt_p': results.pvalues['GPT01_dummy'],
            'n_obs': len(df_clean)
        }
    except Exception as e:
        print(f"Error in regression for {outcome} - Task {task_num}: {e}")
        return None

def format_coefficient(coef, se, p):
    """Format coefficient with significance stars."""
    stars = ""
    if p < 0.01:
        stars = "^{***}"
    elif p < 0.05:
        stars = "^{**}"
    elif p < 0.1:
        stars = "^{*}"
    
    # Return the formatted string directly - not as a tuple
    return f"${coef:.2f}{stars}$"

def format_pct_change(pct):
    """Format percentage change with plus sign for positive values."""
    if pct >= 0:
        return f"$+{pct:.1f}\\%$"
    else:
        return f"${pct:.1f}\\%$"

def create_gpa_quartile_tables(task_dfs, analysis_path, controls_numeric_df):
    """Create LaTeX tables of treatment effects by GPA quartile for each task."""
    
    outcome_names = [
        'Accuracy', 'Analysis', 'Organization', 'Clarity', 
        'Professionalism', 'Total Score', 'Time Spent', 'Productivity'
    ]
    
    # Define quartiles consistently
    quartiles = ['Bottom 25\\%', '25-50\\%', '50-75\\%', 'Top 25\\%']
    
    for task_num, task_name in task_descriptions.items():
        print(f"\nProcessing Task {task_num}: {task_name}")
        
        df = task_dfs.get(task_num)
        if df is None:
            continue
            
        # Compute GPA quartiles
        df = df.copy()
        student_numbers = df['Student_Number'].unique()
        gpa_df = controls_numeric_df[controls_numeric_df['Student_Number'].isin(student_numbers)].copy()
        gpa_df['GPA_quartile'] = pd.qcut(gpa_df['GPA'], q=4, labels=quartiles)
        
        # Merge GPA quartiles back to main dataframe
        df = df.merge(gpa_df[['Student_Number', 'GPA_quartile']], 
                     on='Student_Number', 
                     how='left')
        
        # Generate LaTeX table with 10 columns
        latex_table = "\\begin{table}[!htbp]\n\\centering\n"
        latex_table += f"\\caption{{Treatment Effects for {task_name} by GPA Quartile}}\n"
        latex_table += f"\\label{{tab:task{task_num}_gpa}}\n"
        latex_table += "\\begin{tabular}{lccccccccc}\n"
        latex_table += "\\hline\\hline\n"
        latex_table += "& & \\multicolumn{4}{c}{Vincent} & \\multicolumn{4}{c}{o1-preview} \\\\\n"
        latex_table += "\\cline{3-10}\n"
        latex_table += "Outcome & GPA Quartile & Effect & SE & \\% Change & N & Effect & SE & \\% Change & N \\\\\n"
        latex_table += "\\hline\n"
        
        # Process each outcome
        for outcome_name in outcome_names:
            if outcome_name == 'Productivity':
                outcome_var = f'P{task_num}_Productivity'
            else:
                outcome_var = outcome_mappings[outcome_name][task_num]
            
            if outcome_var not in df.columns:
                print(f"Skipping {outcome_name} - column {outcome_var} not found")
                continue
                
            # We will generate four rows for each outcome (one per GPA quartile)
            first_quartile = True
            for quartile in quartiles:
                results = extract_subgroup_effects(df, outcome_var, task_num, 
                                                    'GPA_quartile', quartile)
                if results is None:
                    continue
                
                # Calculate percentage changes
                vincent_pct = (results['vincent_coef']/results['control_mean']*100)
                gpt_pct = (results['gpt_coef']/results['control_mean']*100)
                
                # For the first row, print the multirow cell; subsequent rows just add an empty placeholder
                if first_quartile:
                    prefix = f"\\multirow{{4}}{{*}}{{{outcome_name}}} & "
                    first_quartile = False
                else:
                    prefix = "& "
                
                # Fixed: Add quartile column and directly use the formatting functions without creating tuples
                latex_table += prefix
                latex_table += f"{quartile} & {format_coefficient(results['vincent_coef'], results['vincent_se'], results['vincent_p'])} & "
                latex_table += f"(${results['vincent_se']:.2f}$) & "
                latex_table += f"{format_pct_change(vincent_pct)} & {results['n_obs']} & "
                latex_table += f"{format_coefficient(results['gpt_coef'], results['gpt_se'], results['gpt_p'])} & "
                latex_table += f"(${results['gpt_se']:.2f}$) & "
                latex_table += f"{format_pct_change(gpt_pct)} & {results['n_obs']} \\\\\n"
            
            # Add a horizontal line after the outcome rows (if any rows were added)
            if not first_quartile:
                latex_table += "\\hline\n"
        
        latex_table += "\\multicolumn{10}{p{0.95\\linewidth}}{\\footnotesize \\textit{Notes:} "
        latex_table += "Effects shown relative to No AI control group within each GPA quartile. "
        latex_table += "GPA quartiles divide students into four equal groups based on their cumulative GPA. "
        latex_table += "Robust standard errors in parentheses. "
        latex_table += "$^{***}p<0.01$, $^{**}p<0.05$, $^{*}p<0.1$}\n"
        latex_table += "\\end{tabular}\n"
        latex_table += "\\end{table}\n"
        
        # Save the table
        output_path = os.path.join(tables_path, f"task{task_num}_gpa_quartiles.tex")
        with open(output_path, "w") as f:
            f.write(latex_table)
        
        print(f"✅ Saved LaTeX table: {output_path}")

# Execute the analysis (uncomment the line below once your variables are defined)
create_gpa_quartile_tables(task_dfs, analysis_path, controls_numeric_df)


Processing Task 1: Draft Client Email
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task1_gpa_quartiles.tex

Processing Task 2: Draft Legal Memo
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task2_gpa_quartiles.tex

Processing Task 3: Analysis of Complaint
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task3_gpa_quartiles.tex

Processing Task 4: Draft NDA
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\task4_gpa_quartiles.tex

Processing Task 5: Draft Motion to Consolidate
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Pa

In [27]:
def create_gpa_quartile_tables_by_criteria(task_dfs, analysis_path, controls_numeric_df):
    
    outcome_names = [
        'Accuracy', 'Analysis', 'Organization', 'Clarity', 
        'Professionalism', 'Total Score', 'Time Spent', 'Productivity'
    ]
    
    quartiles = ['Bottom 25\\%', '25-50\\%', '50-75\\%', 'Top 25\\%']
    
    for outcome_name in outcome_names:
        print(f"\nProcessing Outcome: {outcome_name}")
        
        # Begin the LaTeX table for the current criterion
        latex_table = "\\begin{table}[!htbp]\n\\centering\n"
        latex_table += f"\\caption{{Treatment Effects for {outcome_name} by Task and GPA Quartile}}\n"
        latex_table += f"\\label{{tab:{outcome_name.lower().replace(' ', '_')}_gpa}}\n"
        latex_table += "\\begin{tabular}{lccccccccc}\n"
        latex_table += "\\hline\\hline\n"
        latex_table += "& & \\multicolumn{4}{c}{Vincent} & \\multicolumn{4}{c}{o1-preview} \\\\\n"
        latex_table += "\\cline{3-10}\n"
        latex_table += "Task & GPA Quartile & Effect & SE & \\% Change & N & Effect & SE & \\% Change & N \\\\\n"
        latex_table += "\\hline\n"
        
        # Loop through tasks
        for task_num, task_name in task_descriptions.items():
            df = task_dfs.get(task_num)
            if df is None:
                continue
            
            # Merge in GPA quartile info
            df = df.copy()
            student_numbers = df['Student_Number'].unique()
            gpa_df = controls_numeric_df[controls_numeric_df['Student_Number'].isin(student_numbers)].copy()
            gpa_df['GPA_quartile'] = pd.qcut(gpa_df['GPA'], q=4, labels=quartiles)
            df = df.merge(gpa_df[['Student_Number', 'GPA_quartile']], on='Student_Number', how='left')
            
            first_quartile_for_task = True
            for quartile in quartiles:
                # Determine the outcome variable for the current task and criterion
                if outcome_name == 'Productivity':
                    outcome_var = f'P{task_num}_Productivity'
                else:
                    outcome_var = outcome_mappings[outcome_name][task_num]
                
                if outcome_var not in df.columns:
                    print(f"Skipping {outcome_name} for task {task_num} - column {outcome_var} not found")
                    continue
                
                results = extract_subgroup_effects(df, outcome_var, task_num, 'GPA_quartile', quartile)
                if results is None:
                    continue
                
                # Calculate percentage changes
                vincent_pct = (results['vincent_coef']/results['control_mean']*100)
                gpt_pct = (results['gpt_coef']/results['control_mean']*100)
                
                # For the first GPA quartile row for this task, use a multirow for the task name
                if first_quartile_for_task:
                    prefix = f"\\multirow{{4}}{{*}}{{{task_name}}} & "
                    first_quartile_for_task = False
                else:
                    prefix = "& "
                    
                latex_table += prefix
                latex_table += f"{quartile} & {format_coefficient(results['vincent_coef'], results['vincent_se'], results['vincent_p'])} & "
                latex_table += f"(${results['vincent_se']:.2f}$) & "
                latex_table += f"{format_pct_change(vincent_pct)} & {results['n_obs']} & "
                latex_table += f"{format_coefficient(results['gpt_coef'], results['gpt_se'], results['gpt_p'])} & "
                latex_table += f"(${results['gpt_se']:.2f}$) & "
                latex_table += f"{format_pct_change(gpt_pct)} & {results['n_obs']} \\\\\n"
            
            # Add a horizontal line after each task (if any rows were added)
            if not first_quartile_for_task:
                latex_table += "\\hline\n"
        
        latex_table += ("\\multicolumn{10}{p{0.95\\linewidth}}{\\footnotesize \\textit{Notes:} "
                        "Effects shown relative to No AI control group within each GPA quartile. "
                        "GPA quartiles divide students into four equal groups based on their cumulative GPA. "
                        "Robust standard errors in parentheses. "
                        "$^{***}p<0.01$, $^{**}p<0.05$, $^{*}p<0.1$}\n")
        latex_table += "\\end{tabular}\n"
        latex_table += "\\end{table}\n"
        
        # Save the table to a file
        output_path = os.path.join(tables_path, f"{outcome_name.lower().replace(' ', '_')}_gpa_quartiles.tex")
        with open(output_path, "w") as f:
            f.write(latex_table)
        
        print(f"✅ Saved LaTeX table: {output_path}")

# Execute the reversed table creation (ensure task_dfs, analysis_path, and controls_numeric_df are defined)
create_gpa_quartile_tables_by_criteria(task_dfs, analysis_path, controls_numeric_df)



Processing Outcome: Accuracy
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\accuracy_gpa_quartiles.tex

Processing Outcome: Analysis
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\analysis_gpa_quartiles.tex

Processing Outcome: Organization
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\organization_gpa_quartiles.tex

Processing Outcome: Clarity
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables\clarity_gpa_quartiles.tex

Processing Outcome: Professionalism
✅ Saved LaTeX table: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th

# Summary stats and balance table across randomization groups

summary stats

In [28]:
def load_task_data(master_folder):
    """Load and prepare task data for summary statistics analysis."""

    task_files = [f for f in os.listdir(analysis_path) if f.startswith("task") and f.endswith("_data_numeric.csv")]
    task_dfs = {file: pd.read_csv(os.path.join(analysis_path, file)) for file in task_files}

    return task_dfs

def compute_summary_stats(data, var, total_count):
    """Compute summary statistics including mean, SD, min, max, and percentiles."""
    var_data = data[var].dropna()

    stats_dict = {
        'Mean': var_data.mean(),
        'SD': var_data.std(),
        'Min': var_data.min(),
        '25th Percentile': var_data.quantile(0.25),
        'Median': var_data.median(),
        '75th Percentile': var_data.quantile(0.75),
        'Max': var_data.max(),
        'N': len(var_data),
        'Missing %': (1 - len(var_data) / total_count) * 100  # Corrected missing percentage
    }

    return stats_dict

def create_summary_stats_table(task_dfs):
    """Create LaTeX summary statistics table for all tasks."""
    latex_lines = []
    latex_lines.append("\\begin{table}[!htbp]")
    latex_lines.append("\\centering")
    latex_lines.append("\\caption{Summary Statistics for All Tasks}")
    latex_lines.append("\\label{tab:summary_stats}")
    latex_lines.append("\\begin{tabular}{lccccccccc}")
    latex_lines.append("\\hline\\hline")
    latex_lines.append("Variable & Mean & SD & Min & 25\\% & Median & 75\\% & Max & N & Missing \\% \\\\")
    latex_lines.append("\\hline")

    for task_file, df in sorted(task_dfs.items()):
        task_num = ''.join(filter(str.isdigit, task_file))  # Extract task number from filename
        total_count = len(df)

        variables = {
            f'P{task_num}_Total_Score': f'Total Score (Task {task_num})',
            f'P{task_num}_Productivity': f'Productivity (Task {task_num})',
            f'Time_Spent_Assignment_{task_num}': f'Time Spent (Task {task_num})'
        }

        for var, var_label in variables.items():
            if var in df.columns:
                stats = compute_summary_stats(df, var, total_count)

                # Format line
                line = f"{var_label} & {stats['Mean']:.2f} & {stats['SD']:.2f} & {stats['Min']:.2f} & "
                line += f"{stats['25th Percentile']:.2f} & {stats['Median']:.2f} & {stats['75th Percentile']:.2f} & "
                line += f"{stats['Max']:.2f} & {stats['N']} & {stats['Missing %']:.1f}\\% \\\\"

                latex_lines.append(line)

    latex_lines.append("\\hline")
    latex_lines.append("\\multicolumn{10}{p{0.95\\linewidth}}{\\footnotesize \\textit{Notes:} ")
    latex_lines.append("SD: Standard Deviation. Missing \\% represents percentage of missing values in each variable.} \\\\")
    latex_lines.append("\\end{tabular}")
    latex_lines.append("\\end{table}")

    return "\n".join(latex_lines)



# Load task data
task_dfs = load_task_data(master_folder)

# Create summary statistics table
latex_table = create_summary_stats_table(task_dfs)

# Save table
os.makedirs(tables_path, exist_ok=True)

with open(os.path.join(tables_path, "summary_stats_table.tex"), "w") as f:
    f.write(latex_table)

# Display summary statistics in a DataFrame for user
summary_data = []
for task_file, df in sorted(task_dfs.items()):
    task_num = ''.join(filter(str.isdigit, task_file))
    total_count = len(df)

    variables = {
        f'P{task_num}_Total_Score': f'Total Score (Task {task_num})',
        f'P{task_num}_Productivity': f'Productivity (Task {task_num})',
        f'Time_Spent_Assignment_{task_num}': f'Time Spent (Task {task_num})'
    }

    for var, var_label in variables.items():
        if var in df.columns:
            stats = compute_summary_stats(df, var, total_count)
            summary_data.append([var_label] + list(stats.values()))

# Convert to DataFrame
columns = ["Variable", "Mean", "SD", "Min", "25th Percentile", "Median", "75th Percentile", "Max", "N", "Missing %"]
summary_df = pd.DataFrame(summary_data, columns=columns)


In [29]:
from scipy import stats as scipy_stats  # Import with a different name to avoid conflicts

def load_data(master_folder):
    """Load and prepare the data for balance analysis."""
        
    # Load controls and task1 data
    controls = pd.read_csv(os.path.join(analysis_path, "controls_numeric.csv"))
    task1 = pd.read_csv(os.path.join(analysis_path, "task1_data_numeric.csv"))
    
    # Create group mapping
    task1['Group'] = task1['AI_Condition'].map({
        'No AI': 'Group A',
        'GPT 01': 'Group B',
        'Vincent': 'Group C'
    })
    
    # Merge datasets
    data = task1.merge(controls[['Student_Number', 'GPA', 'AI_Use', 'Student_Type']], 
                      on='Student_Number', 
                      how='left')
    
    return data

def compute_group_stats_continuous(data, var):
    """Compute statistics for continuous variables by treatment group."""
    stats_dict = {}
    for group in ['Group A', 'Group B', 'Group C']:
        group_data = data[data['Group'] == group]
        group_var = group_data[var].dropna()
        
        stats_dict[group] = {
            'mean': group_var.mean(),
            'sd': group_var.std(),
            'n': len(group_var),
            'total_n': len(group_data),
            'missing_rate': (1 - len(group_var) / len(group_data)) * 100 if len(group_data) > 0 else 0
        }
    return stats_dict

def compute_group_stats_categorical(data, var, categories):
    """Compute proportions for categorical variables by treatment group."""
    stats_dict = {}
    for group in ['Group A', 'Group B', 'Group C']:
        group_data = data[data['Group'] == group]
        total_valid = group_data[var].notna().sum()
        total_n = len(group_data)
        
        stats_dict[group] = {
            'categories': {},
            'missing_rate': (1 - total_valid / total_n) * 100 if total_n > 0 else 0,
            'total_n': total_n,
            'valid_n': total_valid
        }
        
        for cat in categories:
            count = (group_data[var] == cat).sum()
            prop = count / total_valid if total_valid > 0 else 0
            stats_dict[group]['categories'][cat] = {
                'count': count,
                'proportion': prop
            }
    return stats_dict

def compute_chi_square_test(data, var):
    """Compute chi-square test for categorical variables across groups."""
    contingency_table = pd.crosstab(data['Group'], data[var])
    chi2, p_val = scipy_stats.chi2_contingency(contingency_table)[:2]
    return p_val

def compute_f_test(data, var):
    """Compute F-test for continuous variables across groups."""
    groups = []
    for group in ['Group A', 'Group B', 'Group C']:
        group_data = data[data['Group'] == group][var].dropna()
        if len(group_data) > 0:  # Only append non-empty groups
            groups.append(group_data)
    
    if len(groups) >= 2:  # Need at least 2 groups for F-test
        f_stat, p_val = scipy_stats.f_oneway(*groups)
        return p_val
    return float('nan')  # Return NaN if not enough groups

def create_balance_table(data):
    """Create comprehensive balance table with adjusted formatting."""
    
    latex_lines = []
    latex_lines.append("\\begin{table}[!htbp]")
    latex_lines.append("\\setlength{\\tabcolsep}{6pt}")
    latex_lines.append("\\renewcommand{\\arraystretch}{1.0}")
    latex_lines.append("\\centering")
    latex_lines.append("\\caption{Balance Across Randomized Groups}")
    latex_lines.append("\\label{tab:balance}")
    latex_lines.append("\\begin{tabular*}{0.85\\textwidth}{@{\\extracolsep{\\fill}}lccccr@{}}")
    latex_lines.append("\\hline\\hline")
    latex_lines.append("Variable & Group A & Group B & Group C & N & p-value \\\\")
    latex_lines.append("\\hline")
    
    # Add group sizes
    group_sizes = data.groupby('Group').size()
    size_line = f"Group Size & {group_sizes['Group A']} & {group_sizes['Group B']} & {group_sizes['Group C']} & {len(data)} & \\\\"
    latex_lines.append(size_line)
    
    # Panel A: Continuous Variables
    latex_lines.append("\\hline")
    latex_lines.append("\\multicolumn{6}{l}{\\textbf{Panel A: Continuous Variables}} \\\\")
    
    # Add GPA statistics
    gpa_stats = compute_group_stats_continuous(data, 'GPA')
    f_test_p = compute_f_test(data, 'GPA')
    total_n = sum(group_stat['n'] for group_stat in gpa_stats.values())
    
    gpa_line = f"GPA & {gpa_stats['Group A']['mean']:.3f} & {gpa_stats['Group B']['mean']:.3f} & {gpa_stats['Group C']['mean']:.3f} & {total_n} & {f_test_p:.3f} \\\\"
    latex_lines.append(gpa_line)
    
    sd_line = f"& ({gpa_stats['Group A']['sd']:.3f}) & ({gpa_stats['Group B']['sd']:.3f}) & ({gpa_stats['Group C']['sd']:.3f}) & & \\\\"
    latex_lines.append(sd_line)
    
    missing_line = f"Missing (\\%) & {gpa_stats['Group A']['missing_rate']:.1f}\\% & {gpa_stats['Group B']['missing_rate']:.1f}\\% & {gpa_stats['Group C']['missing_rate']:.1f}\\% & & \\\\"
    latex_lines.append(missing_line)
    
    # Panel B: Student Type
    latex_lines.append("\\hline")
    latex_lines.append("\\multicolumn{6}{l}{\\textbf{Panel B: Student Type}} \\\\")
    
    student_type_stats = compute_group_stats_categorical(data, 'Student_Type', student_type_mapping.keys())
    chi2_p = compute_chi_square_test(data, 'Student_Type')
    
    for type_code, type_label in student_type_mapping.items():
        props = [student_type_stats[group]['categories'][type_code]['proportion'] * 100 
                for group in ['Group A', 'Group B', 'Group C']]
        counts = [student_type_stats[group]['categories'][type_code]['count'] 
                 for group in ['Group A', 'Group B', 'Group C']]
        total_count = sum(counts)
        
        p_value = f"{chi2_p:.3f}" if type_code == 1 else ""
        props_line = f"{type_label} & {props[0]:.1f}\\% & {props[1]:.1f}\\% & {props[2]:.1f}\\% & {total_count} & {p_value} \\\\"
        latex_lines.append(props_line)
    
    missing_line = f"Missing (\\%) & {student_type_stats['Group A']['missing_rate']:.1f}\\% & {student_type_stats['Group B']['missing_rate']:.1f}\\% & {student_type_stats['Group C']['missing_rate']:.1f}\\% & & \\\\"
    latex_lines.append(missing_line)
    
    # Panel C: AI Use
    latex_lines.append("\\hline")
    latex_lines.append("\\multicolumn{6}{l}{\\textbf{Panel C: Prior AI Use}} \\\\")
    
    ai_use_stats = compute_group_stats_categorical(data, 'AI_Use', ai_use_mapping.keys())
    chi2_p = compute_chi_square_test(data, 'AI_Use')
    
    for use_code, use_label in ai_use_mapping.items():
        props = [ai_use_stats[group]['categories'][use_code]['proportion'] * 100 
                for group in ['Group A', 'Group B', 'Group C']]
        counts = [ai_use_stats[group]['categories'][use_code]['count'] 
                 for group in ['Group A', 'Group B', 'Group C']]
        total_count = sum(counts)
        
        p_value = f"{chi2_p:.3f}" if use_code == 1 else ""
        props_line = f"{use_label} & {props[0]:.1f}\\% & {props[1]:.1f}\\% & {props[2]:.1f}\\% & {total_count} & {p_value} \\\\"
        latex_lines.append(props_line)
    
    missing_line = f"Missing (\\%) & {ai_use_stats['Group A']['missing_rate']:.1f}\\% & {ai_use_stats['Group B']['missing_rate']:.1f}\\% & {ai_use_stats['Group C']['missing_rate']:.1f}\\% & & \\\\"
    latex_lines.append(missing_line)
    
    # Add table footer
    latex_lines.append("\\hline")
    latex_lines.append("\\multicolumn{6}{p{0.85\\textwidth}}{\\footnotesize \\textit{Notes:} ")
    latex_lines.append("Standard deviations shown in parentheses for continuous variables. ")
    latex_lines.append("Missing (\\%) shows percentage of students with missing data in each group. ")
    latex_lines.append("P-values from F-test of equality of means for continuous variables and chi-square test of independence for categorical variables. ")
    latex_lines.append("N shows number of non-missing observations. ")
    latex_lines.append("Groups are rotated through different treatment conditions (No AI, o1-preview, Vincent) across tasks.} \\\\")
    latex_lines.append("\\end{tabular*}")
    latex_lines.append("\\end{table}")
    
    return "\n".join(latex_lines)


data = load_data(master_folder)
    
# Create balance table
latex_table = create_balance_table(data)
    
with open(os.path.join(tables_path, "balance_table.tex"), "w") as f:
    f.write(latex_table)
    
print(f"Balance table saved successfully at {tables_path}!")


Balance table saved successfully at C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\tables!


# differential effects 

In [30]:
# Load all task files
def load_task_data(analysis_path):
    task_dfs = {}
    for task_num in range(1, 7):
        file_path = os.path.join(analysis_path, f"task{task_num}_data_cleaned.csv")
        task_dfs[task_num] = pd.read_csv(file_path)
    return task_dfs

# Create dataframe with only students who completed all tasks
def get_complete_students_df(task_dfs):
    all_students = set.intersection(*[set(df['Student_Number'].dropna().unique()) for df in task_dfs.values()])
    complete_df = []
    
    for student in all_students:
        student_data = {'Student_Number': student}
        for task_num, df in task_dfs.items():
            student_row = df[df['Student_Number'] == student]
            if not student_row.empty:
                student_data[f'Task{task_num}_Score'] = student_row[f'P{task_num}_Total_Score'].iloc[0]
                student_data[f'Task{task_num}_Productivity'] = student_row[f'P{task_num}_Productivity'].iloc[0]
                student_data[f'Task{task_num}_AI_Condition'] = student_row['AI_Condition'].iloc[0]
        complete_df.append(student_data)
    
    return pd.DataFrame(complete_df)

# Load data
task_dfs = load_task_data(analysis_path)
complete_students_df = get_complete_students_df(task_dfs)

# Save dataframe
output_path = os.path.join(analysis_path, "complete_students_data.csv")
complete_students_df.to_csv(output_path, index=False)

print(f"Dataframe with complete students saved to {output_path}")

Dataframe with complete students saved to C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\complete_students_data.csv


In [31]:
def regression_line(x: np.ndarray, y: np.ndarray):
    """
    Fit an OLS model on the non-missing data and return predicted y values
    over a grid of x values, along with 95% confidence intervals.
    """
    mask = np.isfinite(x) & np.isfinite(y)
    x_clean = x[mask]
    y_clean = y[mask]
    if len(x_clean) < 2:
        return np.array([]), np.array([]), np.array([]), np.array([])

    X = sm.add_constant(x_clean)
    model = sm.OLS(y_clean, X).fit()
    x_range = np.linspace(x_clean.min(), x_clean.max(), 100)
    X_range = sm.add_constant(x_range)
    pred = model.get_prediction(X_range)
    pred_summary = pred.summary_frame(alpha=0.05)  # 95% CI
    y_pred = pred_summary['mean'].values
    ci_lower = pred_summary['mean_ci_lower'].values
    ci_upper = pred_summary['mean_ci_upper'].values
    return x_range, y_pred, ci_lower, ci_upper

def plot_gpa_graph(df: pd.DataFrame, y1_col: str, y2_col: str, title: str,
                   y1_label: str, y2_label: str, output_path: str, y_label: str = "Mean Value"):
    """
    Plot GPA vs. an outcome for two conditions.

    - Scatter points are drawn for both outcomes.
    - Regression lines with 95% confidence intervals are plotted.
    - Legend labels drop the word "regression" and any mention of GPT is replaced with o1-preview.
    - A note is added on the right of the plot.
    - The x-axis is fixed from 2.5 to 4.
    - Zero outcomes are dropped, yet the y-axis still starts at 0 with some slack.
    """
    plt.figure(figsize=(10, 8))
    
    # Filter valid data and drop rows with zero outcomes
    df_plot = df[['GPA', y1_col, y2_col]].dropna()
    df_plot = df_plot[(df_plot[y1_col] != 0) & (df_plot[y2_col] != 0)]
    
    x = df_plot['GPA'].values
    y1 = df_plot[y1_col].values
    y2 = df_plot[y2_col].values
    
    # Plot scatter points (each dot represents a student)
    plt.scatter(x, y1, color='blue', alpha=0.6, s=50)
    plt.scatter(x, y2, color='red', alpha=0.6, s=50)
    
    # Regression and CI for y1 (blue)
    x_range, y1_line, y1_ci_lower, y1_ci_upper = regression_line(x, y1)
    if x_range.size > 0:
        plt.plot(x_range, y1_line, color='blue', linewidth=2)
        plt.fill_between(x_range, y1_ci_lower, y1_ci_upper, color='blue', alpha=0.2)
        
    # Regression and CI for y2 (red)
    x_range, y2_line, y2_ci_lower, y2_ci_upper = regression_line(x, y2)
    if x_range.size > 0:
        plt.plot(x_range, y2_line, color='red', linewidth=2)
        plt.fill_between(x_range, y2_ci_lower, y2_ci_upper, color='red', alpha=0.2)
    
    plt.xlabel("GPA")
    plt.ylabel(y_label)
    plt.title(title)
    
    # Clean legend labels: remove "regression" and replace any GPT/GPT 01 with o1-preview
    legend_label1 = y1_label.replace("GPT", "o1-preview").replace("GPT 01", "o1-preview")
    legend_label2 = y2_label.replace("GPT", "o1-preview").replace("GPT 01", "o1-preview")
    
    legend_handles = [
        Line2D([0], [0], color='blue', lw=2, label=legend_label1),
        Line2D([0], [0], color='red', lw=2, label=legend_label2)
    ]
    plt.legend(handles=legend_handles, loc='upper left')
    
    # Set x-axis limits fixed from 2.5 to 4
    plt.xlim(2.5, 4)
    
    # Set y-axis starting at 0 and add some slack on top
    if not df_plot.empty:
        ymax = max(df_plot[y1_col].max(), df_plot[y2_col].max())
        plt.ylim(0, ymax * 1.1)
    else:
        plt.ylim(0, 1)
    
    # Adjust layout to leave space on the right and add a note there
    plt.subplots_adjust(right=0.75)
    plt.figtext(0.65, 0,
                "Note:\nEach dot represents a student\nShaded areas denote 95% confidence intervals",
                ha="left", va="center", fontsize=10, color="gray")
    
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close()

def main():
    # File paths
    gpa_file = os.path.join(analysis_path, "controls_numeric.csv")
    tasks_file = os.path.join(analysis_path, "complete_students_data.csv")
    # Load GPA data (expects columns: Student_Number, GPA, etc.)
    df_gpa = pd.read_csv(gpa_file)
    
    # Load task-level data in wide format
    df_tasks = pd.read_csv(tasks_file)
    
    # Reshape task data from wide to long format.
    dfs = []
    for i in range(1, 7):
        score_col = f"Task{i}_Score"
        prod_col = f"Task{i}_Productivity"
        cond_col = f"Task{i}_AI_Condition"
        if score_col in df_tasks.columns:
            temp = df_tasks[["Student_Number", score_col, prod_col, cond_col]].copy()
            temp = temp.rename(columns={
                score_col: "Score",
                prod_col: "Productivity",
                cond_col: "AI_Condition"
            })
            temp["Task"] = i
            dfs.append(temp)
    df_long = pd.concat(dfs, ignore_index=True)
    
    # Merge GPA into the long-format task data using Student_Number
    df_long = pd.merge(df_long, df_gpa[['Student_Number', 'GPA']], on="Student_Number", how="inner")
    
    # Group by Student_Number and AI_Condition to compute mean Score and Productivity per student
    df_means = df_long.groupby(['Student_Number', 'AI_Condition'], as_index=False).agg({
        'Score': 'mean',
        'Productivity': 'mean',
        'GPA': 'first'  # GPA is the same for each student
    })
    
    # Pivot the table so that each AI condition becomes its own column for Score and Productivity
    df_score = df_means.pivot(index="Student_Number", columns="AI_Condition", values="Score").reset_index()
    df_prod = df_means.pivot(index="Student_Number", columns="AI_Condition", values="Productivity").reset_index()
    
    # Rename columns to match the specification
    df_score.rename(columns={
        "No AI": "Mean_Score_no_AI",
        "Vincent": "Mean_Score_Vincent",
        "GPT 01": "Mean_Score_GPT_01"
    }, inplace=True)
    df_prod.rename(columns={
        "No AI": "Mean_Productivity_no_AI",
        "Vincent": "Mean_Productivity_Vincent",
        "GPT 01": "Mean_Productivity_GPT_01"
    }, inplace=True)
    
    # Merge the score and productivity dataframes, then merge back GPA
    df_final = pd.merge(df_score, df_prod, on="Student_Number", how="outer")
    df_final = pd.merge(df_gpa[['Student_Number', 'GPA']], df_final, on="Student_Number", how="inner")
    
    # Create output directory for graphs
    output_dir = figures_path 
    os.makedirs(output_dir, exist_ok=True)
    
    # Graph 1: Productivity comparison (No AI vs o1-preview)
    plot_gpa_graph(
        df_final,
        y1_col="Mean_Productivity_no_AI",
        y2_col="Mean_Productivity_GPT_01",  # originally labeled as GPT 01
        title="Mean Task Productivity Comparison for o1-preview Tasks and No AI Tasks, by Participant GPA",
        y1_label="No AI Productivity",
        y2_label="o1-preview Productivity",
        output_path=os.path.join(output_dir, "graph1_productivity_noAI_vs_o1-preview.png"),
        y_label="Mean Value (Mean Total Productivity Across o1-preview Tasks)"
    )
    
    # Graph 2: Score comparison (No AI vs o1-preview)
    plot_gpa_graph(
        df_final,
        y1_col="Mean_Score_no_AI",
        y2_col="Mean_Score_GPT_01",  # originally labeled as GPT 01
        title="Mean Task Score Comparison for o1-preview Tasks and No AI Tasks, by Participant GPA",
        y1_label="No AI Score",
        y2_label="o1-preview Score",
        output_path=os.path.join(output_dir, "graph2_score_noAI_vs_o1-preview.png"),
        y_label="Mean Value (Mean Total Score Across o1-preview Tasks)"
    )
    
    # Graph 3: Productivity comparison (No AI vs Vincent)
    plot_gpa_graph(
        df_final,
        y1_col="Mean_Productivity_no_AI",
        y2_col="Mean_Productivity_Vincent",
        title="Mean Task Productivity Comparison for Vincent AI Tasks and No AI Tasks, by Participant GPA",
        y1_label="No AI Productivity",
        y2_label="Vincent Productivity",
        output_path=os.path.join(output_dir, "graph3_productivity_noAI_vs_Vincent.png"),
        y_label="Mean Value (Mean Total Productivity Across Vincent AI Tasks)"
    )
    
    # Graph 4: Score comparison (No AI vs Vincent)
    plot_gpa_graph(
        df_final,
        y1_col="Mean_Score_no_AI",
        y2_col="Mean_Score_Vincent",
        title="Mean Task Score Comparison for Vincent AI Tasks and No AI Tasks, by Participant GPA",
        y1_label="No AI Score",
        y2_label="Vincent Score",
        output_path=os.path.join(output_dir, "graph4_score_noAI_vs_Vincent.png"),
        y_label="Mean Value (Mean Total Score Across Vincent AI Tasks)"
    )
    
    print("Graphs have been saved in the directory:", output_dir)

if __name__ == "__main__":
    setup_plot_style()
    main()


Graphs have been saved in the directory: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\figures


In [32]:
def setup_plot_style():
    """Configure enhanced plot style."""
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.rcParams.update({
        'font.family': 'sans-serif',
        'font.size': 12,
        'figure.figsize': (12, 8),
        'axes.grid': True,
        'grid.color': '#E5E5E5',
        'grid.linestyle': '--',
        'grid.alpha': 0.7,
        'axes.axisbelow': True,
        'axes.labelsize': 14,
        'axes.titlesize': 16,
        'axes.titleweight': 'bold',
        'figure.facecolor': 'white'
    })

def add_trend_line(ax, x, y):
    """Add a trend line with confidence interval."""
    mask = np.isfinite(x) & np.isfinite(y)
    x_clean = x[mask]
    y_clean = y[mask]
    
    if len(x_clean) < 2:
        return
        
    X = sm.add_constant(x_clean)
    model = sm.OLS(y_clean, X).fit()
    
    x_range = np.linspace(x_clean.min(), x_clean.max(), 100)
    X_range = sm.add_constant(x_range)
    
    # Get predictions and confidence intervals
    predictions = model.get_prediction(X_range)
    y_pred = predictions.predicted_mean
    ci = predictions.conf_int(alpha=0.05)
    
    # Plot trend line and confidence interval
    ax.plot(x_range, y_pred, color='#FF6B6B', linestyle='-', linewidth=2.5, alpha=0.8, 
            label='Trend Line')
    ax.fill_between(x_range, ci[:, 0], ci[:, 1], color='#FF6B6B', alpha=0.1,
                    label='95% CI')

def create_scatter_plot(df, x_col, y_col, title, xlabel, ylabel, output_path):
    """Create an enhanced scatter plot with trend line."""
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Create scatter plot with improved aesthetics
    scatter = ax.scatter(df[x_col], df[y_col], 
                        color='#4A90E2',  # Nice blue color
                        alpha=0.6,
                        s=100,            # Larger points
                        edgecolor='white',
                        linewidth=0.5)
    
    # Add trend line with confidence interval
    add_trend_line(ax, df[x_col], df[y_col])
    
    # Customize plot
    ax.set_title(title, pad=20)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    
    # Enhance grid
    ax.grid(True, linestyle='--', alpha=0.4, color='gray')
    
    # Add a subtle box around the plot
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.5)
    ax.spines['bottom'].set_linewidth(0.5)
    
    # Add legend
    ax.legend(['Data Points', 'Trend Line', '95% CI'],
              loc='upper right',
              frameon=True,
              framealpha=0.9)
    
    # Adjust layout and save
    plt.tight_layout()
    plt.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
    plt.close()

def main():
    # File paths
    tasks_file = os.path.join(analysis_path, "complete_students_data.csv")
    gpa_file = os.path.join(analysis_path, "controls_numeric.csv")
    output_dir = figures_path 

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Load and prepare data
    df_tasks = pd.read_csv(tasks_file)
    df_gpa = pd.read_csv(gpa_file)
    
    # Prepare the data
    task_means = []
    for student in df_tasks['Student_Number'].unique():
        student_data = {'Student_Number': student}
        
        # Calculate mean scores for each condition
        for condition in ['No AI', 'Vincent', 'GPT 01']:
            scores = []
            for i in range(1, 7):
                task_col = f'Task{i}_Score'
                cond_col = f'Task{i}_AI_Condition'
                if task_col in df_tasks.columns:
                    mask = (df_tasks['Student_Number'] == student) & (df_tasks[cond_col] == condition)
                    if any(mask):
                        score = df_tasks.loc[mask, task_col].iloc[0]
                        if pd.notna(score):
                            scores.append(score)
            
            if scores:
                student_data[f'mean_{condition.lower().replace(" ", "_")}_score'] = np.mean(scores)
        
        task_means.append(student_data)
    
    df_means = pd.DataFrame(task_means)
    
    # Merge with GPA data
    df_final = pd.merge(df_means, df_gpa[['Student_Number', 'GPA']], on='Student_Number')
    
    # Calculate boosts
    df_final['mean_o1_boost'] = df_final['mean_gpt_01_score'] - df_final['mean_no_ai_score']
    df_final['mean_vincent_boost'] = df_final['mean_vincent_score'] - df_final['mean_no_ai_score']
    
    # Define plots with enhanced titles
    plots = [
        {
            'x_col': 'GPA',
            'y_col': 'mean_o1_boost',
            'title': 'o1-preview Performance Boost vs. Student GPA',
            'xlabel': 'Student GPA',
            'ylabel': 'o1-preview Score Boost',
            'filename': 'plot1_o1_boost_vs_gpa.png'
        },
        {
            'x_col': 'GPA',
            'y_col': 'mean_vincent_boost',
            'title': 'Vincent Performance Boost vs. Student GPA',
            'xlabel': 'Student GPA',
            'ylabel': 'Vincent Score Boost',
            'filename': 'plot2_vincent_boost_vs_gpa.png'
        },
        {
            'x_col': 'mean_no_ai_score',
            'y_col': 'mean_o1_boost',
            'title': 'o1-preview Performance Boost vs. Baseline Score',
            'xlabel': 'Mean No AI Score',
            'ylabel': 'o1-preview Score Boost',
            'filename': 'plot3_o1_boost_vs_noai.png'
        },
        {
            'x_col': 'mean_no_ai_score',
            'y_col': 'mean_vincent_boost',
            'title': 'Vincent Performance Boost vs. Baseline Score',
            'xlabel': 'Mean No AI Score',
            'ylabel': 'Vincent Score Boost',
            'filename': 'plot4_vincent_boost_vs_noai.png'
        }
    ]
    
    # Generate all plots
    for plot in plots:
        output_path = os.path.join(output_dir, plot['filename'])
        create_scatter_plot(
            df_final,
            plot['x_col'],
            plot['y_col'],
            plot['title'],
            plot['xlabel'],
            plot['ylabel'],
            output_path
        )
    
    print("Plots have been saved in:", output_dir)

if __name__ == "__main__":
    setup_plot_style()
    main()

Plots have been saved in: C:\Users\tomas\AI-Powered Lawyering - Anonymized Replication Package as of March 6th 2025\analysis\data\working_20250306_143808\figures


# Post Experiment Survey visualizations 

In [33]:
# Define file path and load the Excel file
file_path = os.path.join(master_folder, 'Qualtrics surveys\Post completion survey\GPT o1 Post Experiment Survey_November 27, 2024_03.58.xlsx') 

df = pd.read_excel(file_path)

# Create a clean dataset by keeping only the latest entry for each name
def create_deduplicated_dataset(df):
    # Create a copy of the dataframe without the header row (row 0)
    clean_df = df[1:].copy()
    
    # Sort by StartDate to ensure we keep the latest entry
    clean_df = clean_df.sort_values('StartDate')
    
    # Drop duplicates based on name (column '1'), keeping the last entry
    clean_df = clean_df.drop_duplicates(subset='1', keep='last')
    
    # Reset the index
    clean_df = clean_df.reset_index(drop=True)
    
    # Add back the header row at the top
    header_row = df.iloc[0:1]
    final_df = pd.concat([header_row, clean_df])
    
    return final_df

# Create deduplicated dataset
deduplicated_df = create_deduplicated_dataset(df)

# Define output path in the same directory as the input file
analysis_path = os.path.dirname(file_path)
#output_filename = 'deduplicated_survey_responses.xlsx'
#output_path = os.path.join(analysis_path, output_filename)

# Save the deduplicated dataset
#deduplicated_df.to_excel(output_path, index=False)

# Print summary
print(f"\nOriginal number of responses: {len(df)}")
print(f"Number of responses after deduplication: {len(deduplicated_df)}")
#print(f"Deduplicated dataset saved to: {output_path}")

# Optional: Print removed duplicates
original_names = set(df['1'][1:])  # Skip header row
deduplicated_names = set(deduplicated_df['1'][1:])  # Skip header row
removed_names = original_names - deduplicated_names

if removed_names:
    print("\nRemoved duplicate entries for:")
    for name in removed_names:
        print(f"- {name}")


Original number of responses: 119
Number of responses after deduplication: 114


  file_path = os.path.join(master_folder, 'Qualtrics surveys\Post completion survey\GPT o1 Post Experiment Survey_November 27, 2024_03.58.xlsx')


In [34]:
from scipy import stats #reimport

# Use the deduplicated dataset
numeric_data = deduplicated_df.iloc[1:].copy()

# Define the question pairs with clearer self-assessment labels
question_pairs = {
    'Perceived Quality Impact': ('Q1', 'Q7'),
    'Perceived Speed Impact': ('Q2', 'Q8'),
    'Self-Reported Satisfaction': ('Q3', 'Q9'),
    'Self-Assessed Improvement': ('Q5', 'Q10'),
    'Intended Future Use': ('Q6', 'Q11')
}

# Data processing
columns_to_analyze = ['Q1', 'Q2', 'Q3', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11']

# Convert to numeric and handle zeros as missing values
for col in columns_to_analyze:
    numeric_data[col] = pd.to_numeric(numeric_data[col], errors='coerce')
    numeric_data[col] = numeric_data[col].replace(0, np.nan)  # Convert zeros to NaN

# Calculate means and standard errors for each pair
gpt_means = []
vincent_means = []
gpt_sems = []
vincent_sems = []
n_responses = []

for gpt_col, vincent_col in question_pairs.values():
    # Calculate statistics for GPT o1
    gpt_valid = numeric_data[gpt_col].dropna()
    gpt_means.append(gpt_valid.mean())
    gpt_sems.append(stats.sem(gpt_valid, nan_policy='omit'))
    
    # Calculate statistics for Vincent AI
    vincent_valid = numeric_data[vincent_col].dropna()
    vincent_means.append(vincent_valid.mean())
    vincent_sems.append(stats.sem(vincent_valid, nan_policy='omit'))
    
    # Store number of valid responses
    n_responses.append((len(gpt_valid), len(vincent_valid)))

# Create figure with adjusted dimensions
fig, ax = plt.subplots(figsize=(14, 7))

# Set up positions
x = np.arange(len(question_pairs))
width = 0.35

# Create bars with refined styling
rects1 = ax.barh(x - width/2, gpt_means, width, label='GPT o1', 
                 color='#87CEEB',  # Lighter, more muted blue
                 xerr=gpt_sems, capsize=3,
                 error_kw={'elinewidth': 1.5, 'capthick': 1.5})
rects2 = ax.barh(x + width/2, vincent_means, width, label='Vincent AI', 
                 color='#90EE90',  # Lighter, more muted green
                 xerr=vincent_sems, capsize=3,
                 error_kw={'elinewidth': 1.5, 'capthick': 1.5})

# Add value labels with refined styling
def add_labels(rects):
    for rect in rects:
        width = rect.get_width()
        ax.text(width + 0.15, rect.get_y() + rect.get_height()/2,
                f'{width:.2f}', ha='left', va='center',
                fontsize=10, fontweight='bold', color='#444444')

add_labels(rects1)
add_labels(rects2)

# Customize chart with refined styling
ax.set_xlabel('Mean Self-Reported Score (1-5 scale)', fontsize=11)
ax.set_title('Self-Reported Impact of GPT o1 vs Vincent AI', fontsize=13, pad=15)
ax.set_yticks(x)
ax.set_yticklabels(list(question_pairs.keys()), fontsize=10)

# Set x-axis limits and ticks
ax.set_xlim(0, max(max(gpt_means), max(vincent_means)) * 1.2)
ax.set_xticks(np.arange(0, 5.1, 1.0))

# Add refined grid
ax.grid(axis='x', linestyle='--', alpha=0.2, color='gray')

# Customize spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_linewidth(0.5)
ax.spines['bottom'].set_linewidth(0.5)

# Add legend with refined styling
ax.legend(loc='lower right', framealpha=0.9, fontsize=10)

# Add sample size note with refined styling
min_n = min(min(n) for n in n_responses)
max_n = max(max(n) for n in n_responses)
if min_n != max_n:
    plt.figtext(0.01, 0.02, f'n = {min_n}-{max_n}', fontsize=9, color='#666666')
else:
    plt.figtext(0.01, 0.02, f'n = {min_n}', fontsize=9, color='#666666')

# Adjust layout
plt.tight_layout()


# Create the directory if it doesn't exist
os.makedirs(figures_path, exist_ok=True)
plt.savefig(os.path.join(figures_path, "gpt_vs_vincent_self_reported_impact_comparison.png"), dpi=300, bbox_inches='tight')
plt.close(fig)  

# Print summary statistics
print("\nSummary Statistics:")
for (metric, (gpt_col, vincent_col)), n in zip(question_pairs.items(), n_responses):
    print(f"\n{metric}:")
    gpt_valid = numeric_data[gpt_col].dropna()
    vincent_valid = numeric_data[vincent_col].dropna()
    
    print(f"GPT o1: n={len(gpt_valid)}, Mean = {gpt_valid.mean():.2f}, SEM = {stats.sem(gpt_valid, nan_policy='omit'):.2f}")
    print(f"Vincent AI: n={len(vincent_valid)}, Mean = {vincent_valid.mean():.2f}, SEM = {stats.sem(vincent_valid, nan_policy='omit'):.2f}")


Summary Statistics:

Perceived Quality Impact:
GPT o1: n=113, Mean = 3.65, SEM = 0.09
Vincent AI: n=113, Mean = 3.84, SEM = 0.10

Perceived Speed Impact:
GPT o1: n=113, Mean = 4.20, SEM = 0.07
Vincent AI: n=113, Mean = 3.92, SEM = 0.11

Self-Reported Satisfaction:
GPT o1: n=113, Mean = 3.19, SEM = 0.12
Vincent AI: n=112, Mean = 3.45, SEM = 0.12

Self-Assessed Improvement:
GPT o1: n=113, Mean = 3.13, SEM = 0.10
Vincent AI: n=113, Mean = 3.32, SEM = 0.11

Intended Future Use:
GPT o1: n=113, Mean = 3.78, SEM = 0.10
Vincent AI: n=113, Mean = 3.81, SEM = 0.11


In [35]:
# Map tasks to their corresponding question columns for each AI tool
task_columns = {
   'Legal Memo': ('Q16', 'Q26'),         # (Vincent, GPT o1)
   'Client Email': ('Q25', 'Q21'),       # Vincent Q25, GPT Q21
   'Complaint Analysis': ('Q22', 'Q18'),  # Vincent Q22, GPT Q18 
   'NDA': ('Q27', 'Q23'),               # Vincent Q27, GPT Q23
   'Motion to Consolidate': ('Q19', 'Q28'), # Vincent Q19, GPT Q28
   'Persuasive Letter': ('Q24', 'Q20')  # Vincent Q24, GPT Q20
}

# Data processing
numeric_data = deduplicated_df.iloc[1:].copy()
columns_to_analyze = [col for pair in task_columns.values() for col in pair]

# Convert to numeric and handle zeros as missing values
for col in columns_to_analyze:
   numeric_data[col] = pd.to_numeric(numeric_data[col], errors='coerce')
   numeric_data[col] = numeric_data[col].replace(0, np.nan)  # Convert zeros to NaN

# Calculate means and standard errors for each task
vincent_means = []
gpt_means = []
vincent_sems = []
gpt_sems = []
n_responses = []

for vincent_col, gpt_col in task_columns.values():
   # Calculate statistics for Vincent AI
   vincent_valid = numeric_data[vincent_col].dropna()
   vincent_means.append(vincent_valid.mean())
   vincent_sems.append(stats.sem(vincent_valid, nan_policy='omit'))
   
   # Calculate statistics for GPT o1
   gpt_valid = numeric_data[gpt_col].dropna()
   gpt_means.append(gpt_valid.mean())
   gpt_sems.append(stats.sem(gpt_valid, nan_policy='omit'))
   
   # Store number of valid responses
   n_responses.append((len(vincent_valid), len(gpt_valid)))

# Create figure with adjusted dimensions
fig, ax = plt.subplots(figsize=(14, 7))

# Set up bar positions
x = np.arange(len(task_columns))
width = 0.35

# Create bars with refined styling
rects1 = ax.bar(x - width/2, vincent_means, width, label='Vincent AI', 
               color='#90EE90',  # Lighter, more muted green
               yerr=vincent_sems, capsize=3,
               error_kw={'elinewidth': 1.5, 'capthick': 1.5})
rects2 = ax.bar(x + width/2, gpt_means, width, label='GPT o1', 
               color='#87CEEB',  # Lighter, more muted blue
               yerr=gpt_sems, capsize=3,
               error_kw={'elinewidth': 1.5, 'capthick': 1.5})

# Add value labels with refined styling and increased vertical offset
def add_labels(rects):
   for rect in rects:
       height = rect.get_height()
       ax.text(rect.get_x() + rect.get_width()/2, height + 0.25,
               f'{height:.2f}', ha='center', va='bottom',
               fontsize=10, fontweight='bold', color='#444444')

add_labels(rects1)
add_labels(rects2)

# Customize chart with refined styling
ax.set_ylabel('Self-reported Helpfulness of AI Tools (1-5 scale)', fontsize=11)
ax.set_title('Perceived Helpfulness by Task and AI Tool', fontsize=13, pad=15)
ax.set_xticks(x)
ax.set_xticklabels(task_columns.keys(), rotation=30, ha='right', fontsize=10)

# Set y-axis limits and ticks
ax.set_ylim(0, 5.2)  # Increased upper limit to accommodate higher labels
ax.set_yticks(np.arange(0, 5.1, 1.0))

# Add refined grid
ax.grid(axis='y', linestyle='--', alpha=0.2, color='gray')

# Customize spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_linewidth(0.5)
ax.spines['bottom'].set_linewidth(0.5)

# Add legend with refined styling
ax.legend(loc='upper right', framealpha=0.9, fontsize=10)

# Add sample size note with refined styling
min_n = min(min(n) for n in n_responses)
max_n = max(max(n) for n in n_responses)
if min_n != max_n:
   plt.figtext(0.01, 0.02, f'n = {min_n}-{max_n}', fontsize=9, color='#666666')
else:
   plt.figtext(0.01, 0.02, f'n = {min_n}', fontsize=9, color='#666666')

plt.tight_layout()

# Create the directory if it doesn't exist
os.makedirs(figures_path, exist_ok=True)
plt.savefig(os.path.join(figures_path, "gpt_vs_vincent_perceived_helpfulness.png"), dpi=300, bbox_inches='tight')
plt.close(fig)  

# Print detailed summary statistics
print("\nSummary Statistics:")
for (task, (vincent_col, gpt_col)), n in zip(task_columns.items(), n_responses):
   print(f"\n{task}:")
   vincent_valid = numeric_data[vincent_col].dropna()
   gpt_valid = numeric_data[gpt_col].dropna()
   
   print(f"Vincent AI: n={len(vincent_valid)}, Mean = {vincent_valid.mean():.2f}, SEM = {stats.sem(vincent_valid, nan_policy='omit'):.2f}")
   print(f"GPT o1: n={len(gpt_valid)}, Mean = {gpt_valid.mean():.2f}, SEM = {stats.sem(gpt_valid, nan_policy='omit'):.2f}")


Summary Statistics:

Legal Memo:
Vincent AI: n=41, Mean = 3.41, SEM = 0.15
GPT o1: n=37, Mean = 3.41, SEM = 0.18

Client Email:
Vincent AI: n=37, Mean = 3.51, SEM = 0.19
GPT o1: n=35, Mean = 3.49, SEM = 0.18

Complaint Analysis:
Vincent AI: n=35, Mean = 3.91, SEM = 0.18
GPT o1: n=41, Mean = 3.37, SEM = 0.15

NDA:
Vincent AI: n=37, Mean = 3.30, SEM = 0.23
GPT o1: n=35, Mean = 3.20, SEM = 0.18

Motion to Consolidate:
Vincent AI: n=41, Mean = 3.22, SEM = 0.18
GPT o1: n=37, Mean = 3.38, SEM = 0.23

Persuasive Letter:
Vincent AI: n=35, Mean = 3.66, SEM = 0.19
GPT o1: n=41, Mean = 3.34, SEM = 0.20
