In [None]:
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
from scipy import stats


In [None]:
def save_to_file(results, filename):
    """
    Save analysis results to CSV/Excel.
    """
    rows = []

    rows.append({
        'demographic_variable': 'overall',
        'demographic_value': 'all',
        'mean_difference': results['overall']['mean_difference'],
        'std_difference': results['overall']['std_difference'],
        'ttest_statistic': results['overall']['ttest'].statistic,
        'ttest_pvalue': results['overall']['ttest'].pvalue,

    })

    for demo_var, analysis in results.items():
        if demo_var != 'overall':
            group_data = analysis['group_analysis']

            for demo_val in group_data.index:
              if analysis['anova'] is not None:
                row = {
                    'demographic_variable': demo_var,
                    'demographic_value': demo_val,
                    'mean_difference': group_data.loc[demo_val, ('grade_diff', 'mean')],
                    'std_difference': group_data.loc[demo_val, ('grade_diff', 'std')],
                    'count': group_data.loc[demo_val, ('grade_diff', 'count')],
                    'regular_mean': group_data.loc[demo_val, ('pred_regular', 'mean')],
                    'demo_mean': group_data.loc[demo_val, ('pred_demo', 'mean')],
                    'effect_size': group_data.loc[demo_val, 'effect_size'][0],
                    'anova_pvalue': analysis['anova'].pvalue
                }
              else:
                row = {
                    'demographic_variable': demo_var,
                    'demographic_value': demo_val,
                    'mean_difference': group_data.loc[demo_val, ('grade_diff', 'mean')],
                    'std_difference': group_data.loc[demo_val, ('grade_diff', 'std')],
                    'count': group_data.loc[demo_val, ('grade_diff', 'count')],
                    'regular_mean': group_data.loc[demo_val, ('pred_regular', 'mean')],
                    'demo_mean': group_data.loc[demo_val, ('pred_demo', 'mean')],
                    'effect_size': group_data.loc[demo_val, 'effect_size'][0],
                    'anova_pvalue': "NaN"
                }
              rows.append(row)

    df = pd.DataFrame(rows)

    if filename.endswith('.xlsx'):
        df.to_excel(filename, index=False)
    else:
        df.to_csv(filename, index=False)

    return df

In [None]:
def get_results(demo, regular):
  merged_df = regular.merge(demo, on='essay_id', suffixes=('_regular', '_demo'))
  df = merged_df.merge(training_data, on='essay_id')

  df.rename(columns={'demographic_value_x': 'demographic_value'}, inplace=True)
  # df.drop(columns=['demographic_value_y'], inplace=True)
  df['grade_diff'] = df['pred_demo'] - df['pred_regular']

  results = {}

  #overall analysis
  results['overall'] = {
          'mean_difference': df['grade_diff'].mean(),
          'std_difference': df['grade_diff'].std(),
          'ttest': stats.ttest_rel(df['pred_regular'], df['pred_demo'], nan_policy='omit'),

  }


  return results

In [None]:
training_data = pd.read_json('/content/drive/My Drive/COS597H/data/train_indiv_demo_shuffled2.json', lines = True)

In [None]:
data = pd.read_json('/content/drive/My Drive/COS597H/data/train_indiv_demo_shuffled2.json', lines = True)

In [None]:
data['demographic_variable'].unique()

array(['gender', 'grade_level', 'ell_status', 'race_ethnicity',
       'economically_disadvantaged', 'student_disability_status'],
      dtype=object)

In [None]:
#llama analysis
regular = pd.read_csv('/content/drive/My Drive/COS597H/outputs/llama_regular.csv')
demo = pd.read_csv('/content/drive/My Drive/COS597H/outputs/llama_demo.csv')
results = get_results(demo, regular)
save_to_file(results, 'llama_results.csv')

Unnamed: 0,demographic_variable,demographic_value,mean_difference,std_difference,ttest_statistic,ttest_pvalue
0,overall,all,-0.350534,1.478683,21.027466,1.508542e-95


In [None]:
#llama-instruct analysis
regular = pd.read_csv('/content/drive/My Drive/COS597H/outputs/llama-instruct_regular.csv')
demo = pd.read_csv('/content/drive/My Drive/COS597H/outputs/llama-instruct_demo_verbose.csv')
results = get_results(demo, regular)
save_to_file(results, 'llama_instruct_results.csv')

Unnamed: 0,demographic_variable,demographic_value,mean_difference,std_difference,ttest_statistic,ttest_pvalue
0,overall,all,0.036477,0.772323,-4.189389,2.8e-05


In [None]:
#qwen analysis
demo = pd.read_csv('/content/drive/My Drive/COS597H/outputs/qwen_demo.csv')
regular = pd.read_csv('/content/drive/My Drive/COS597H/outputs/qwen_regular.csv')
results = get_results(demo, regular)
save_to_file(results, 'qwen_results.csv')

Unnamed: 0,demographic_variable,demographic_value,mean_difference,std_difference,ttest_statistic,ttest_pvalue
0,overall,all,0.478769,1.424022,-29.818555,3.386067e-185


In [None]:
#gemma analysis
demo = pd.read_csv('/content/drive/My Drive/COS597H/outputs/gemma_demo.csv')
regular = pd.read_csv('/content/drive/My Drive/COS597H/outputs/gemma_regular_verbose.csv')
results = get_results(demo, regular)
save_to_file(results, 'gemma_results.csv')

Unnamed: 0,demographic_variable,demographic_value,mean_difference,std_difference,ttest_statistic,ttest_pvalue
0,overall,all,1.901881,1.186352,-142.200825,0.0


In [None]:
#flan analysis
demo = pd.read_csv('/content/drive/My Drive/COS597H/outputs/flan-t5_demo.csv')
regular = pd.read_csv('/content/drive/My Drive/COS597H/outputs/flan-t5_regular_verbose.csv')
results = get_results(demo, regular)
save_to_file(results, 'flan_results.csv')

Unnamed: 0,demographic_variable,demographic_value,mean_difference,std_difference,ttest_statistic,ttest_pvalue
0,overall,all,0.226108,23.764229,-0.799625,0.423955
