In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
from scipy.stats import sem
from scipy.stats import pearsonr
from scipy.stats import shapiro
from scipy.stats import normaltest
from scipy.stats import ttest_rel, ttest_ind

In [2]:
from rpy2.robjects.packages import importr
import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector

# import R's "base" package
utils = importr('utils')
utils.chooseCRANmirror(ind=1) # select the first mirror in the list

packnames = ['lme4', 'optimx', 'pbkrtest', 'lmerTest',
             'ggplot2', 'dplyr', 'sjPlot', 'car']

# if there is no package then install it
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
    utils.install_packages(StrVector(names_to_install))

In [3]:
%load_ext rpy2.ipython

# import R's "base" package
lme4 = importr('lme4')
optimx = importr('optimx')
pbkrtest = importr('pbkrtest')
lmerTest = importr('lmerTest')
ggplot = importr('ggplot2')
dplyr = importr('dplyr')
sjp = importr('sjPlot')
sjmisc = importr('sjmisc')
car = importr('car')
lattice = importr('lattice')

In [4]:
home_dir = '/Users/dbao/google_drive_db'+'/road_construction/data/2022_online/'
map_dir = 'active_map/'
data_dir  = 'data/preprocessed'
out_dir = home_dir + 'figures/figures_all/'
R_out_dir = home_dir + 'R_analysis_data/'

In [5]:
data_puzzle_level = pd.read_csv(R_out_dir +  'data.csv')
puzzleID_order_data = data_puzzle_level.sort_values(["subjects","puzzleID","condition"])
data_choice_level = pd.read_csv(R_out_dir +  'choice_level/choicelevel_data.csv')

sc_condition_data = data_choice_level[data_choice_level['condition']==1].copy()
sc_condition_data = sc_condition_data.reset_index()

In [None]:
data_choice_level_df = data_choice_level[(data_choice_level['undo'] == 0)|(data_choice_level['firstUndo'] == 1)]
data_choice_level_df = data_choice_level_df.reset_index()

In [39]:
subject = np.array(data_puzzle_level.subjects)
puzzleID = np.array(data_puzzle_level.puzzleID)

mas = np.array(data_puzzle_level.mas)
num_loss = np.array(data_puzzle_level.mas) - np.array(data_puzzle_level.numCities)

numUndo = np.array(data_puzzle_level.numUNDO)
numFullUndo = np.array(data_puzzle_level.numFullUndo)
# numFullUndo = np.sign(numFullUndo)

dat = pd.DataFrame({'condition':condition,'subject':subject, 'puzzleID':puzzleID,
                    'mas':mas, 'num_loss':num_loss,
                    'numUndo':numUndo, 'numFullUndo':numFullUndo, 
                    'numFullUndo_factor':numFullUndo,'numFullUndo_factor1':numFullUndo,'numFullUndo_factor2':numFullUndo,'numFullUndo_factor3':numFullUndo,
                    'numUndo_center':numUndo,'numFullUndo_center':numFullUndo,
                    'numUndo_center0':numUndo,'numFullUndo_center0':numFullUndo})


# fix bug
dat.loc[dat[dat.num_loss == -1].index,'mas'] = 9
num_loss = np.array(dat.mas) - np.array(data_puzzle_level.numCities)
dat.num_loss = num_loss

In [40]:
# center number of undo in undo condition
for si in np.unique(dat.subject):
    index = np.intersect1d(dat[dat['condition']==1].index, dat[dat['subject']==si].index)
    dat.loc[index,'numFullUndo_center'] = dat.loc[index,'numFullUndo'] - dat.loc[index,'numFullUndo'].mean()
    dat.loc[index,'numUndo_center'] = dat.loc[index,'numUndo'] - dat.loc[index,'numUndo'].mean()


In [41]:
# Assumption here: there is no effect of not undoing, but centralize for others. 
for si in np.unique(dat.subject):
    index = np.intersect1d(dat[dat['condition']==1].index,dat[dat['subject']==si].index)
    index = np.intersect1d(dat[dat['numFullUndo']!=0].index, index)
    dat.loc[index,'numFullUndo_center0'] = dat.loc[index,'numFullUndo'] - dat.loc[index,'numFullUndo'].mean()
    dat.loc[index,'numUndo_center0'] = dat.loc[index,'numUndo'] - dat.loc[index,'numUndo'].mean()

In [42]:
dat.loc[dat.numFullUndo_factor ==1,'numFullUndo_factor'] = 1
dat.loc[dat.numFullUndo_factor >=2,'numFullUndo_factor'] = 2

In [43]:
dat.loc[dat.numFullUndo_factor ==1,'numFullUndo_factor1'] = -.5
dat.loc[dat.numFullUndo_factor >=2,'numFullUndo_factor1'] = .5

In [51]:
dat.loc[dat.numFullUndo_factor <=1,'numFullUndo_factor2']  = -1
dat.loc[dat.numFullUndo_factor ==2,'numFullUndo_factor2']  = 0
dat.loc[dat.numFullUndo_factor >2,'numFullUndo_factor2']  = 1

dat.loc[dat.numFullUndo_factor ==0,'numFullUndo_factor3']= -1
dat.loc[dat.numFullUndo_factor ==1,'numFullUndo_factor3']= 0
dat.loc[dat.numFullUndo_factor >1,'numFullUndo_factor3'] = 1