# Many Labs re-analysis

In [1]:
import pandas as pd
import numpy as np 
import seaborn as sns
import math
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import GroupKFold, cross_validate

In [2]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)

### Read in the dataset 

In [3]:
df = pd.read_csv('clean_dataset.txt', sep='\t')

### Define useful functions

1)  Define function to compute Cohen's d and standardized mean difference

In [4]:
def compute_metrics(out_name, group_name, levels):
    ''' Returns cohen\'s d and standardized mean difference for anchoring studies'''
    # Subset relevant data 
    cond_1 = df[out_name][df[group_name] == levels[0]]
    cond_2= df[out_name][df[group_name] == levels[1]]
    # Compute metrics 
    m1 = cond_1.mean()
    m2 = cond_2.mean()
    stdev = df[out_name].std()
    n1 = cond_1.notnull().sum()
    n2 = cond_2.notnull().sum()
    # Cohen's d
    pool_std1 = (n1 - 1) * cond_1.std()**2
    pool_std2 = (n2 - 1) * cond_2.std()**2
    d = (m2 - m1) / math.sqrt((pool_std1 + pool_std2)/(n1 + n2 - 2))
    # Standardized difference
    stdmdiff = m1 - m2 / stdev
    return d, stdmdiff

2) Define function to split data into X and y based on IV and DV column names

In [None]:
def split_data(group_col, dv_col):
    X = df_stats[group_col][df_stats[dv_col].isnull() == False].dropna().values.reshape(-1,1)
    y = df_stats[dv_col][df_stats[group_col].isnull() == False].dropna().values
    grp = df_stats['sample'][(df_stats[group_col].isnull() == False) & (df_stats[dv_col].isnull() == False)]
    return X, y, grp

3) Define function to fit OLS or logistic regression models

In [None]:

def fit_model(group_col, dv_col, type='reg'):
    X, y, grp = split_data(group_col, dv_col)
    X2 = sm.add_constant(X)
    if type =='reg':
        est = sm.OLS(y, X2)
    elif type == 'class':
        est = sm.Logit(y, X2)
    m = est.fit()
    if type == 'reg':
        outs = (np.nan, m.rsquared, m.params[1], m.pvalues[1])
    else:
        acc = np.diag(m.pred_table()).sum() / m.pred_table().sum()
        outs = (acc, np.nan, m.params[1], m.pvalues[1])
    return outs

### Preprocess data

Recode categorical variables

In [5]:
df['allowedforbidden_bin'] = df['allowedforbidden'].map({'YES':1, 'NO':0})
df['gainlossgroup'] = df['gainlossgroup'].map({'People will die':'die', 'People will be saved':'saved'})
df['gainlossDV_bin'] = df['gainlossDV'].map({'chose program with exact outcome':0, 'chose program with probability outcome':1})
df['scalesgroup'] = df['scalesgroup'].map({'high category scale':'high', 'low category scale':'low'})
df['scales_bin'] = df['scales'].map({'less than 2 1/2 hrs':0, 'more than 2 1/2 hrs':1})
df['reciprocityus_bin'] = df['reciprocityus'].map({'yes':1, 'no':0})

Make lists with names of DV columns in dataframe and name of IV in dataframe

In [6]:
dvs = ['anchoring' + str(i) for i in range(1,5)] + ['allowedforbidden_bin', 'gambfalDV', 'gainlossDV_bin','d_art', 'scales_bin', 'quote', 'reciprocityus_bin','sunkDV', 'Imagineddv', 'flagdv', 'Sysjust']
grps = ['anch' + str(i) + 'group' for i in range(1,5)] + ['allowedforbiddenGroup', 'gambfalgroup', 'gainlossgroup', 'sex', 'scalesgroup', 'quoteGroup', 'reciprocitygroup', 'sunkgroup', 'ContactGroup', 'flagGroup', 'MoneyGroup']


Add list coding for levels for each IV (all IVs are categorical)

In [None]:
levels = [['lowanchor', 'highanchor']] * 4 + [['allowed', 'forbidden'], ['two6', 'three6'], ['die', 'saved'], ['m', 'f'], ['high', 'low'], ['liked source', 'disliked source'], ['Asked first', 'Asked second'], ['paid', 'free'], ['Control group', 'Contact group'], ['no prime', 'flag prime'], ['Control group', 'Money priming group']]

Add coding for regression vs. classification problem

In [None]:
reg_vs_class = ['reg','reg','reg','reg','class','reg','class','reg','class','reg','class','reg','reg','reg','reg']

Add some interpretable information on studies and DVs

In [7]:
dvs_explain = ['SF-NYC distance', 'Pop. Chicago', 'Height Mt. Everest', 'Babies born per day', 'Binary', 'Numer of times dice rolled', 'Binary', 'IAT score', 'Binary', 'Agreement with quote (1-9)', 'Binary', 'Likelihood to attend game (1-9)', 'Aggregate willingness to interact (1-9)', 'Aggregate political conservatism (1-7)', 'Aggregate system justification (1-7)']
effect_names = ['anchoringNY', 'anchoringChicago', 'anchoringMtEverest', 'anchoringBabies', 'allowedforbidden', 'gamblersfallacy', 'gainlossframing', 'mathattitudes', 'low/high scales', 'quoteattribution', 'reciprocity', 'sunkcost', 'imaginedcontact', 'flagpriming', 'currencypriming']

### Compute Cohen's d and standardized mean difference (to make sure we can reproduce estimates from Table 2 in the paper)
Note that we are not using rank-transformed DVs for anchoring (so to keep interpretability of MSE values later on, so those values will not be identical)

In [8]:
names, cohensds, stdmeandiffs = [], [], []
for idx in range(len(dvs)):
    names.append(dvs[idx])
    d, smdiff = compute_metrics(dvs[idx], grps[idx], levels[idx])
    cohensds.append(abs(d))
    stdmeandiffs.append(abs(smdiff))

### Also compute correlations

In [9]:
cors = []
df_stats = df.copy()
for idx in range(len(dvs)):
    df_stats[grps[idx]] = df_stats[grps[idx]].map({levels[idx][0]: 0,
                                                   levels[idx][1]: 1})
    cval = df_stats[[dvs[idx], grps[idx]]].corr().iloc[0,1]
    cors.append(cval)

Add info to dataset and visualize

In [10]:
summary = pd.DataFrame(list(zip(effect_names, cohensds, stdmeandiffs, cors, reg_vs_class)), 
                       columns=['effect', 'cohensd', 'std_mean_diff', 'pearsons_r', 'problem_type']) 
display(summary)

Unnamed: 0,effect,cohensd,std_mean_diff,pearsons_r,problem_type
0,anchoringNY,1.15905,2847.56614,0.50131,reg
1,anchoringChicago,1.77999,1033554.90135,0.66481,reg
2,anchoringMtEverest,2.29785,11846.76164,0.75431,reg
3,anchoringBabies,2.18582,3202.9508,0.73748,reg
4,allowedforbidden,1.96363,0.61604,-0.70064,class
5,gamblersfallacy,0.6259,0.74034,0.2974,reg
6,gainlossframing,0.59908,0.09183,-0.28697,class
7,mathattitudes,0.53339,0.78257,0.24199,reg
8,low/high scales,0.49686,0.04829,-0.24092,class
9,quoteattribution,0.32167,3.5373,-0.15877,reg


### Run OLS or Logistic Regression (IVs / DVs only) for all effects

In [14]:
r2_scores = []
perc_acc = []
coefs = []
pvals = []

# Fit for anchoring effects
for idx in range(len(grps)):
    acc, r2, par, pval = fit_model(grps[idx], dvs[idx], type=reg_vs_class[idx])
    perc_acc.append(acc)
    r2_scores.append(r2)
    coefs.append(par)
    pvals.append(pval)

Optimization terminated successfully.
         Current function value: 0.402598
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.650599
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.401718
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.595776
         Iterations 5


Add to summary df and display

In [15]:
summary['reg_coef'] = coefs
summary['reg_perc_acc'] = perc_acc
summary['reg_r2'] = r2_scores
summary['reg_pval'] = pvals
display(summary)

Unnamed: 0,effect,cohensd,std_mean_diff,pearsons_r,problem_type,reg_coef,reg_perc_acc,reg_r2,reg_pval
0,anchoringNY,1.15905,2847.56614,0.50131,reg,1143.20716,,0.25131,0.0
1,anchoringChicago,1.77999,1033554.90135,0.66481,reg,1964484.41985,,0.44198,0.0
2,anchoringMtEverest,2.29785,11846.76164,0.75431,reg,22652.0418,,0.56898,0.0
3,anchoringBabies,2.18582,3202.9508,0.73748,reg,23497.38255,,0.54387,0.0
4,allowedforbidden,1.96363,0.61604,-0.70064,class,-3.72006,0.84584,,0.0
5,gamblersfallacy,0.6259,0.74034,0.2974,reg,1.69587,,0.08845,0.0
6,gainlossframing,0.59908,0.09183,-0.28697,class,-1.18267,0.64312,,0.0
7,mathattitudes,0.53339,0.78257,0.24199,reg,0.25912,,0.05856,0.0
8,low/high scales,0.49686,0.04829,-0.24092,class,-1.4421,0.84472,,0.0
9,quoteattribution,0.32167,3.5373,-0.15877,reg,-0.69527,,0.02521,0.0


### Now try regression with a bunch of random variables
(First recode the variables, then run regression)

In [16]:
df_stats['us_or_international'] = df_stats['us_or_international'].map({'US':0, 'International':1})
df_stats['lab_or_online'] = df_stats['lab_or_online'].map({'In-lab':0, 'Online':1})

In [28]:
random_cols = ['age', 'sex', 'us_or_international', 'lab_or_online']
random_r2, random_par, random_pval, random_perc_acc = [], [], [], []
for c in random_cols:
    for idx in range(len(grps)):
        perc_acc, r2, par, pval = fit_model(c, dvs[idx], reg_vs_class[idx])
        random_perc_acc.append(perc_acc)
        random_r2.append(r2)
        random_par.append(par)
        random_pval.append(pval)
random_idx = list(np.repeat(random_cols, len(grps)))

Optimization terminated successfully.
         Current function value: 0.678547
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.691332
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.423837
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.595173
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.677783
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.692039
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.431521
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.600329
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.678530
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.691825
  

Visualize results

In [29]:
random_df = pd.DataFrame(zip(random_idx, dvs * 7, random_perc_acc, random_r2, random_par, random_pval), columns=['random_IV', 'DV', 'perc_acc', 'r2', 'coef', 'pval'])
display(random_df)

Unnamed: 0,random_IV,DV,perc_acc,r2,coef,pval
0,age,anchoring1,,0.00089,-2.91655,0.02933
1,age,anchoring2,,0.01022,13289.81245,0.0
2,age,anchoring3,,0.00019,17.96978,0.30449
3,age,anchoring4,,7e-05,11.68457,0.53049
4,age,allowedforbidden_bin,0.58503,,0.00209,0.35195
5,age,gambfalDV,,0.00013,-0.00288,0.38234
6,age,gainlossDV_bin,0.51965,,0.00815,0.0003
7,age,d_art,,0.01321,0.00504,0.0
8,age,scales_bin,0.84489,,0.02691,0.0
9,age,quote,,0.00517,0.01388,0.0


Find best random predictor (we will use this to compare predictive accuracy based on actual IVs vs. best random IV)

In [30]:
random_best, random_best_score = [], []

for idx, c in enumerate(random_df.DV.unique()):
    sub_df = random_df[random_df['DV'] == c]
    if reg_vs_class[idx] == 'reg':
        best = sub_df['random_IV'][sub_df['r2'] == sub_df['r2'].max()].values[0]
        score = sub_df['r2'].max()
    else:
        best = sub_df['random_IV'][sub_df['perc_acc'] == sub_df['perc_acc'].max()].values[0]
        score = sub_df['perc_acc'].max()
    random_best.append(best)
    random_best_score.append(score)

### Cross-validated prediction
How well can we predict, if we run cross-validated linear regression or logistic regression?

In [41]:
K = 10
cv_mean_r2, cv_mean_mse, cv_mean_acc = [], [], []
est_lr = LinearRegression()
est_log = LogisticRegression()

for idx in range(len(grps)):
    gkfolds = GroupKFold(n_splits=K)
    X, y, grp = split_data(grps[idx], dvs[idx])
    if reg_vs_class[idx] == 'class':
        cv_scores = cross_validate(est_log, X, y, grp, cv=gkfolds)
        mean_r2 = np.nan
        mean_mse = np.nan
        perc_acc = cv_scores['test_score'].mean()
    else:
        cv_scores = cross_validate(est_lr, X, y, grp, cv=gkfolds, 
                                   scoring=('r2', 'neg_mean_absolute_error'))
        mean_r2 = cv_scores['test_r2'].mean()
        mean_mse = abs(cv_scores['test_neg_mean_absolute_error']).mean()
        perc_acc = np.nan
    cv_mean_acc.append(perc_acc)
    cv_mean_r2.append(mean_r2)
    cv_mean_mse.append(mean_mse)

summary['cv_mean_acc'] = cv_mean_acc
summary['cv_mean_r2'] = cv_mean_r2
summary['cv_mean_mse'] = cv_mean_mse
summary['dv_meaning'] = dvs_explain

In [42]:
summary

Unnamed: 0,effect,cohensd,std_mean_diff,pearsons_r,problem_type,reg_coef,reg_perc_acc,reg_r2,reg_pval,cv_mean_acc,cv_mean_r2,cv_mean_mse,dv_meaning,inv_cv_mean_acc,random_cv_mean_acc,random_cv_mean_r2,random_cv_mean_mse,random_best_score,random_best
0,anchoringNY,1.15905,2847.56614,0.50131,reg,1143.20716,,0.25131,0.0,,0.25381,818.03605,SF-NYC distance,0.51791,,0.00398,952.50331,0.00674,us_or_international
1,anchoringChicago,1.77999,1033554.90135,0.66481,reg,1964484.41985,,0.44198,0.0,,0.45383,855862.23748,Pop. Chicago,0.51245,,0.00306,1300676.43769,0.01364,sex
2,anchoringMtEverest,2.29785,11846.76164,0.75431,reg,22652.0418,,0.56898,0.0,,0.56915,8114.5744,Height Mt. Everest,0.5092,,-0.00124,13519.15789,0.00283,lab_or_online
3,anchoringBabies,2.18582,3202.9508,0.73748,reg,23497.38255,,0.54387,0.0,,0.54892,7813.4839,Babies born per day,0.527,,0.0009,14153.50319,0.00205,us_or_international
4,allowedforbidden,1.96363,0.61604,-0.70064,class,-3.72006,0.84584,,0.0,0.83265,,,Binary,0.84523,0.58516,,,0.58519,us_or_international
5,gamblersfallacy,0.6259,0.74034,0.2974,reg,1.69587,,0.08845,0.0,,0.07555,1.69756,Numer of times dice rolled,0.53373,,-0.00129,1.79426,0.00159,sex
6,gainlossframing,0.59908,0.09183,-0.28697,class,-1.18267,0.64312,,0.0,0.64389,,,Binary,0.64325,0.49435,,,0.51969,us_or_international
7,mathattitudes,0.53339,0.78257,0.24199,reg,0.25912,,0.05856,0.0,,0.04399,0.3962,IAT score,0.67789,,0.03929,0.39508,0.05856,sex
8,low/high scales,0.49686,0.04829,-0.24092,class,-1.4421,0.84472,,0.0,0.86555,,,Binary,0.60232,0.84486,,,0.84489,age
9,quoteattribution,0.32167,3.5373,-0.15877,reg,-0.69527,,0.02521,0.0,,0.0181,1.73949,Agreement with quote (1-9),0.53078,,-0.00366,1.79595,0.00517,age


### Reverse prediction
How well can we predict the condition/group, based on values in the dependent variable?

In [43]:
inv_cv_mean_acc = []

for idx in range(len(grps)):
    X, y, grp = split_data(grps[idx], dvs[idx])
    X = (X.squeeze() == np.unique(X)[0]) * 1
    y = (y == np.unique(y)[0]) * 1
    cv_scores = cross_validate(estimator=est_log, X=y.reshape(-1,1), y=X, groups=grp, cv=gkfolds)
    perc_acc = cv_scores['test_score'].mean()
    inv_cv_mean_acc.append(perc_acc)

summary['inv_cv_mean_acc'] = inv_cv_mean_acc

### Random CV prediction
How well can we predict DVs using cross-validation, if we use the best performing random variable?

In [44]:
cv_mean_r2, cv_mean_mse, cv_mean_acc = [], [], []

for idx in range(len(grps)):
    X, y, grp = split_data(random_best[idx], dvs[idx])
    if reg_vs_class[idx] == 'class':
        cv_scores = cross_validate(est_log, X, y, grp, cv=gkfolds)
        mean_r2 = np.nan
        mean_mse = np.nan
        perc_acc = cv_scores['test_score'].mean()
    else:
        cv_scores = cross_validate(est_lr, X, y, grp, cv=gkfolds, 
                                   scoring=('r2', 'neg_mean_absolute_error'))
        mean_r2 = cv_scores['test_r2'].mean()
        mean_mse = abs(cv_scores['test_neg_mean_absolute_error']).mean()
        perc_acc = np.nan
    cv_mean_acc.append(perc_acc)
    cv_mean_r2.append(mean_r2)
    cv_mean_mse.append(mean_mse)

summary['random_cv_mean_acc'] = cv_mean_acc
summary['random_cv_mean_r2'] = cv_mean_r2
summary['random_cv_mean_mse'] = cv_mean_mse
summary['random_best_score'] = random_best_score
summary['random_best'] = random_best

In [45]:
summary

Unnamed: 0,effect,cohensd,std_mean_diff,pearsons_r,problem_type,reg_coef,reg_perc_acc,reg_r2,reg_pval,cv_mean_acc,cv_mean_r2,cv_mean_mse,dv_meaning,inv_cv_mean_acc,random_cv_mean_acc,random_cv_mean_r2,random_cv_mean_mse,random_best_score,random_best
0,anchoringNY,1.15905,2847.56614,0.50131,reg,1143.20716,,0.25131,0.0,,0.25381,818.03605,SF-NYC distance,0.51916,,0.00115,978.73558,0.00674,us_or_international
1,anchoringChicago,1.77999,1033554.90135,0.66481,reg,1964484.41985,,0.44198,0.0,,0.45383,855862.23748,Pop. Chicago,0.5083,,-0.00104,1321509.75255,0.01364,sex
2,anchoringMtEverest,2.29785,11846.76164,0.75431,reg,22652.0418,,0.56898,0.0,,0.56915,8114.5744,Height Mt. Everest,0.51016,,-0.00353,13688.27567,0.00283,lab_or_online
3,anchoringBabies,2.18582,3202.9508,0.73748,reg,23497.38255,,0.54387,0.0,,0.54892,7813.4839,Babies born per day,0.52372,,-0.00139,14242.49668,0.00205,us_or_international
4,allowedforbidden,1.96363,0.61604,-0.70064,class,-3.72006,0.84584,,0.0,0.83265,,,Binary,0.83265,0.58751,,,0.58519,us_or_international
5,gamblersfallacy,0.6259,0.74034,0.2974,reg,1.69587,,0.08845,0.0,,0.07555,1.69756,Numer of times dice rolled,0.54361,,-0.00281,1.78726,0.00159,sex
6,gainlossframing,0.59908,0.09183,-0.28697,class,-1.18267,0.64312,,0.0,0.64389,,,Binary,0.64389,0.50203,,,0.51969,us_or_international
7,mathattitudes,0.53339,0.78257,0.24199,reg,0.25912,,0.05856,0.0,,0.04399,0.3962,IAT score,0.6956,,0.04399,0.3962,0.05856,sex
8,low/high scales,0.49686,0.04829,-0.24092,class,-1.4421,0.84472,,0.0,0.86555,,,Binary,0.59871,0.86522,,,0.84489,age
9,quoteattribution,0.32167,3.5373,-0.15877,reg,-0.69527,,0.02521,0.0,,0.0181,1.73949,Agreement with quote (1-9),0.53023,,-0.00635,1.78108,0.00517,age


### Tidy up summary
(Split regression and classification problems)

In [53]:
summary_reg = summary[summary['problem_type'] == 'reg'].dropna(axis=1)
summary_reg = summary_reg.rename({'random_best_score': 'random_reg_r2'}, axis=1)
summary_class = summary[summary['problem_type'] == 'class'].dropna(axis=1).drop('dv_meaning', axis=1)
summary_class = summary_class.rename({'random_best_score': 'random_reg_perc_acc'}, axis=1)

### Regression problems summary

In [54]:
summary_reg

Unnamed: 0,effect,cohensd,std_mean_diff,pearsons_r,problem_type,reg_coef,reg_r2,reg_pval,cv_mean_r2,cv_mean_mse,dv_meaning,inv_cv_mean_acc,random_cv_mean_r2,random_cv_mean_mse,random_reg_r2,random_best
0,anchoringNY,1.15905,2847.56614,0.50131,reg,1143.20716,0.25131,0.0,0.25381,818.03605,SF-NYC distance,0.51916,0.00115,978.73558,0.00674,us_or_international
1,anchoringChicago,1.77999,1033554.90135,0.66481,reg,1964484.41985,0.44198,0.0,0.45383,855862.23748,Pop. Chicago,0.5083,-0.00104,1321509.75255,0.01364,sex
2,anchoringMtEverest,2.29785,11846.76164,0.75431,reg,22652.0418,0.56898,0.0,0.56915,8114.5744,Height Mt. Everest,0.51016,-0.00353,13688.27567,0.00283,lab_or_online
3,anchoringBabies,2.18582,3202.9508,0.73748,reg,23497.38255,0.54387,0.0,0.54892,7813.4839,Babies born per day,0.52372,-0.00139,14242.49668,0.00205,us_or_international
5,gamblersfallacy,0.6259,0.74034,0.2974,reg,1.69587,0.08845,0.0,0.07555,1.69756,Numer of times dice rolled,0.54361,-0.00281,1.78726,0.00159,sex
7,mathattitudes,0.53339,0.78257,0.24199,reg,0.25912,0.05856,0.0,0.04399,0.3962,IAT score,0.6956,0.04399,0.3962,0.05856,sex
9,quoteattribution,0.32167,3.5373,-0.15877,reg,-0.69527,0.02521,0.0,0.0181,1.73949,Agreement with quote (1-9),0.53023,-0.00635,1.78108,0.00517,age
11,sunkcost,0.27229,4.62568,-0.13488,reg,-0.60626,0.01819,0.0,-0.01631,1.61474,Likelihood to attend game (1-9),0.52241,0.00444,1.58308,0.0411,age
12,imaginedcontact,0.12698,2.09677,0.06337,reg,0.24608,0.00402,0.0,-0.03647,1.59187,Aggregate willingness to interact (1-9),0.51056,-0.03897,1.58788,0.00229,sex
13,flagpriming,0.02818,0.00356,0.01409,reg,0.02846,0.0002,0.26526,-0.06036,0.76784,Aggregate political conservatism (1-7),0.50261,-0.03506,0.75536,0.01864,us_or_international


### Classification problems summary

In [55]:
summary_class

Unnamed: 0,effect,cohensd,std_mean_diff,pearsons_r,problem_type,reg_coef,reg_perc_acc,reg_pval,cv_mean_acc,inv_cv_mean_acc,random_cv_mean_acc,random_reg_perc_acc,random_best
4,allowedforbidden,1.96363,0.61604,-0.70064,class,-3.72006,0.84584,0.0,0.83265,0.83265,0.58751,0.58519,us_or_international
6,gainlossframing,0.59908,0.09183,-0.28697,class,-1.18267,0.64312,0.0,0.64389,0.64389,0.50203,0.51969,us_or_international
8,low/high scales,0.49686,0.04829,-0.24092,class,-1.4421,0.84472,0.0,0.86555,0.59871,0.86522,0.84489,age
10,reciprocity,0.29691,1.05601,0.14684,class,0.65453,0.70491,0.0,0.69077,0.56358,0.69051,0.70506,sex


### To dos
- Get all error
- Opt: anchoring back to Rank?; point-wise prediction accuracy for LogReg?