## Generate SPSS Sav

*Last update: 2021-10-29*

In [2]:
import pandas as pd
import numpy as np
import os
import json
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import importlib
from scipy.stats import circvar, circstd, pearsonr
import pyreadstat

from trial import Trial
from util import dist
from constants import COND_LABELS, FS
import plots
import analysis_helper


importlib.reload(plots)
importlib.reload(analysis_helper)

plt.style.use('./jg.mplstyle')

### Load data

In [3]:
df_trials = pd.read_pickle('../data/df_trials_v4_spatial.pickle')

df_trials['second_mover_delay'] = df_trials['second_mover_time'] - df_trials['first_mover_time']

In [4]:
DVs = ['fairness', 'goal_color_split', 'total_moves', 'successful', 'score']

### Drop dyads with incomplete trial counts

In [5]:
# Drop practice
df_trials = df_trials[df_trials.practice == 0]

### Prepare data in subject observation format for SPSS ANOVAs

In [7]:
# Want 1 row per subject, and to aggregate each dv across the within-subject factors
# so e.g. goal_pct_diff_bal:e | goal_pct_diff_bal:fb | goal_pct_diff_count:g | goal_pct_diff_count:c
# We'll then use these as 2 2-level within-subjects factors in SPSS's repeated measures general linear model.

In [6]:
# For analysis of "steady state" behavior
in_second_block_half = ((df_trials.trial > 20) & (df_trials.trial <= 40)) | ((df_trials.trial >= 62))

In [9]:
match_nicks = df_trials.match_nick.unique()
count_levels = ["C", "G"]
bal_levels = ['E', 'FB']
df_subj_spss = pd.DataFrame(columns=["group"])
df_trials['h2'] = 0
df_trials.loc[in_second_block_half, 'h2'] = 1
dv_grouped = df_trials.groupby(['match_nick', 'cond_count', 'cond_bal'])[DVs].mean()
dv_grouped_h2 = df_trials[df_trials.h2 == 1].groupby(['match_nick', 'cond_count', 'cond_bal'])[DVs].mean()
data = {}  # match_nick -> data
for key, row in dv_grouped.iterrows():
    match_nick, count, bal = key
    for dv in DVs:
        for count in count_levels:
            for bal in bal_levels:
                if match_nick not in data:
                    data[match_nick] = {}
                data[match_nick]["%s_count_%s_bal_%s" % (dv, count, bal)] = dv_grouped.loc[match_nick, count, bal][dv]
                data[match_nick]["%s_count_%s_bal_%s_h2" % (dv, count, bal)] = dv_grouped_h2.loc[match_nick, count, bal][dv]

for nick, row in data.items():
    sample_trial = df_trials[df_trials.match_nick == nick].iloc[0]
    row["group"] = 1 if sample_trial.first_counter_block == "C" else 2
    row["match_nick"] = nick
    df_subj_spss = pd.concat([df_subj_spss, pd.Series(row, name=nick).to_frame().T], ignore_index=True)

df_subj_spss.group = df_subj_spss.group.astype(int)

In [17]:
pyreadstat.write_sav(df_subj_spss, '../data/df_subjs_v4_spss.sav')