# Behavior-trait analysis, demographic data

In [None]:
import numpy as np
import scipy as sp
from scipy import stats
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

plt.rcParams.update({'font.size': 16})

In [None]:
# setting default fontsizes for plots

s=20# CHANGE FONTSIZE HERE

plt.rc('font', size=s) #controls default text size
plt.rc('axes', titlesize=s) #fontsize of the title
plt.rc('axes', labelsize=s) #fontsize of the x and y labels
plt.rc('xtick', labelsize=s) #fontsize of the x tick labels
plt.rc('ytick', labelsize=s) #fontsize of the y tick labels
plt.rc('legend', fontsize=s) #fontsize of the legend
plt.rcParams['savefig.facecolor']='white'
#import matplotlib as mpl
#mpl.rcParams['font.weight']= 'normal'



Read in restricted behavioral data:

In [None]:
res_behav_data = pd.read_csv('../data/RESTRICTED_esfinn_11_21_2021_19_19_35.csv')
res_behav_data.set_index("Subject", inplace=True)
res_behav_data.index = res_behav_data.index.map(str)
print(res_behav_data.shape)
res_behav_data.head()

Read in unrestricted behavioral data:

In [None]:
unres_behav_data = pd.read_csv('../data/unrestricted_esfinn_11_21_2021_19_19_13.csv')
unres_behav_data.set_index("Subject", inplace=True)
unres_behav_data.index = unres_behav_data.index.map(str)
print(unres_behav_data.shape)
unres_behav_data.head()

In [None]:
print(res_behav_data.columns)
print(unres_behav_data.columns)

# Figuring out how ADR_Intn_T was computed:

In [None]:
Intn_cols = ['ASR_Anxd_Raw','ASR_Witd_Raw','ASR_Soma_Raw'] # from (1)https://wiki.humanconnectome.org/display/PublicData/HCP-YA+Data+Dictionary-+Updated+for+the+1200+Subject+Release 
# and (2)https://aseba.org/wp-content/uploads/2019/02/asrprofile.pdf
Intn_sum = res_behav_data[['ASR_Anxd_Raw','ASR_Witd_Raw','ASR_Soma_Raw']].sum(axis = 1).values 
uneq_rows = res_behav_data.index[Intn_sum != res_behav_data.loc[:,'ASR_Intn_Raw']]#.values)
uneq_rows # rows where sumn of Intn_columns don't match the Intn_Raw score

In [None]:
res_behav_data.loc[uneq_rows,Intn_cols] # these non-matching rows are all empty

-  meaning that ASR_Intn_Raw is the sum of the raw entries in the 3 columns in Intn_cols

In [None]:
asr_cols = [i for i in res_behav_data.columns if i[:3] == 'ASR' ]
#colnum = [i for i in range(len(res_behav_data.columns)) if res_behav_data.columns[i][:3] == 'ASR' ]
asr_cols

In [None]:
Intn_cols = ['ASR_Anxd_Pct','ASR_Witd_T','ASR_Soma_T'] # from (1)https://wiki.humanconnectome.org/display/PublicData/HCP-YA+Data+Dictionary-+Updated+for+the+1200+Subject+Release 
# and (2)https://aseba.org/wp-content/uploads/2019/02/asrprofile.pdf
Intn_sum = res_behav_data[['ASR_Anxd_Pct','ASR_Witd_T','ASR_Soma_T']].sum(axis = 1).values 
uneq_rows = res_behav_data.index[Intn_sum != res_behav_data.loc[:,'ASR_Intn_T']]#.values)
uneq_rows # rows where sumn of Intn_columns don't match the Intn_Raw score

In [None]:
res_behav_data[Intn_cols].mean(axis=1)[:3], res_behav_data['ASR_Intn_T'][:3]

In [None]:
res_behav_data[['ASR_Anxd_Raw','ASR_Witd_Raw','ASR_Soma_Raw','ASR_Anxd_Pct','ASR_Witd_T','ASR_Soma_T']]

In [None]:
Intn_sum[:5]

In [None]:
res_behav_data.loc[:,'ASR_Intn_T'].values[:5]

In [None]:
res_behav_data[['Age_in_Yrs', 'ASR_Intn_Raw', 'ASR_Intn_T']]


Combine restricted and unrestricted behavioral data into a single dataframe:

In [None]:
behav_data = pd.concat([res_behav_data, unres_behav_data], axis=1)
print(behav_data.shape)
behav_data.head()

In [None]:
behav_data['ASR_Intn_T'].describe()

# Behavior-trait analysis

Read in task response data, join into single dataframe, then join this with the larger dataframe:

In [None]:
behavioral_data_drive = r'/Users/f0053cz/Dropbox (Dartmouth College)/postdoc_Dartmouth/HCP/BehaviorAnalyses/Documented scripts/data/data_for_paper'
behavioral_data_drive

In [None]:
task_data = pd.read_csv(os.path.join(behavioral_data_drive, '1b_S_NS_responses_per_subj.csv'))
task_data.set_index("subj_idx", inplace=True)
task_data.index.rename("Subject", inplace=True)
task_data.index = task_data.index.map(str)
print(task_data.shape)
task_data.head()

In [None]:
task_data.iloc[0,:]

In [None]:
task_data2 = pd.read_csv(os.path.join(behavioral_data_drive, '1e_S_NS_pc_uncertainResp_per_subj.csv'))
task_data2.set_index("subj_idx", inplace=True)
task_data2.index.rename("Subject", inplace=True)
task_data2.index = task_data2.index.map(str)
task_data2["pc_unc_total"] = (task_data2["pc_unc_Mental"] + task_data2["pc_unc_Rand"])/2
print(task_data2.shape)
task_data2.head()

In [None]:
task_data = task_data.join(task_data2, how='inner')
print(task_data.shape)
task_data.head()

In [None]:
data = task_data.join(behav_data, how='inner')
print(data.shape)
data.head()

In [None]:
# Demographics for the paper
data.groupby(['Gender']).count()

In [None]:
data['Age_in_Yrs'].describe()

Explore correlations between proportion of "social" responses and various trait phenotypes:

In [None]:
def explore_correlation(x, y, data=data):
    """
    Calculates and plots correlation between x and y variables in dataframe `data`, plus distribution of x and y 
    """
    sns.set_style("white")
    
    inds = ~np.isnan(data[x]) & ~np.isnan(data[y]) # find rows where neither x or y is NaN

    g = sns.jointplot(x=x, y=y, data=data, kind='reg', color='gray')

    # Calculate and print correlations
    rp, pp = stats.pearsonr(data[x][inds], data[y][inds])
    rs, ps = stats.spearmanr(data[x][inds], data[y][inds])
    #g.ax_joint.annotate(f'r_s = {rs:.2f}\n(p={ps:.1e})', xy=(.05,.8), xycoords='axes fraction')
    print(f'Spearman r={rs}, p = {ps}')
    
    return g

In [None]:
# The following function was taken from here: https://github.com/psinger/CorrelationStats/blob/master/corrstats.py

from scipy.stats import t, norm
from math import atanh, pow
from numpy import tanh

def rz_ci(r, n, conf_level = 0.95):
    zr_se = pow(1/(n - 3), .5)
    moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se
    zu = atanh(r) + moe
    zl = atanh(r) - moe
    return tanh((zl, zu))

def rho_rxy_rxz(rxy, rxz, ryz):
    num = (ryz-1/2.*rxy*rxz)*(1-pow(rxy,2)-pow(rxz,2)-pow(ryz,2))+pow(ryz,3)
    den = (1 - pow(rxy,2)) * (1 - pow(rxz,2))
    return num/float(den)

def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'):
    """
    Calculates the statistic significance between two dependent correlation coefficients
    @param xy: correlation coefficient between x and y
    @param xz: correlation coefficient between x and z
    @param yz: correlation coefficient between y and z
    @param n: number of elements in x, y and z
    @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
    @param conf_level: confidence level, only works for 'zou' method
    @param method: defines the method uses, 'steiger' or 'zou'
    @return: t and p-val
    """
    if method == 'steiger':
        d = xy - xz
        determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
        av = (xy + xz)/2
        cube = (1 - yz) * (1 - yz) * (1 - yz)

        t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube)))
        p = 1 - t.cdf(abs(t2), n - 3)

        if twotailed:
            p *= 2

        return t2, p
    elif method == 'zou':
        L1 = rz_ci(xy, n, conf_level=conf_level)[0]
        U1 = rz_ci(xy, n, conf_level=conf_level)[1]
        L2 = rz_ci(xz, n, conf_level=conf_level)[0]
        U2 = rz_ci(xz, n, conf_level=conf_level)[1]
        rho_r12_r13 = rho_rxy_rxz(xy, xz, yz)
        lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
        upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
        return lower, upper
    else:
        raise Exception('Wrong method!')





In [None]:
data.shape, data.head()

## DV1: response % social - % nonsocial

In [None]:
x = "Soc-NonSoc_pc"
y = "ASR_Intn_T"

g = explore_correlation(x, y)
g.ax_joint.set_xlabel('%"Social" - %"Non-social"')
g.ax_joint.set_ylabel('Internalizing score (T)')
#plt.text(0.1,10,r"$r_{S}=-.58$"f'\n(p='r"$.08^+)$")
g.ax_joint.set_xlim(-100,102)
g.ax_joint.annotate(r"$ r_{S}=.1$"f'\n(p='r"$.003)$", xy=(.02,.85), xycoords='axes fraction',fontsize=20) # 0.1, .003
g.savefig('../results/soc-nonsoc_vs_ASRIntnT.png')

In [None]:
# externalization score
x = "Soc-NonSoc_pc"
y = "ASR_Extn_T"
g = explore_correlation(x, y)
g.ax_joint.set_ylabel('Externalizing score (T)')
g.ax_joint.set_xlabel('%"Social" - %"Non-social"')
g.ax_joint.set_xlim(-100,102)
#plt.text(0.1,10,r"$r_{S}=-.58$"f'\n(p='r"$.08^+)$")
g.ax_joint.annotate(r"$ r_{S}=.06$"f'\n(p='r"$.096)$", xy=(.02,.85), xycoords='axes fraction',fontsize=20) # 0.1, .003
g.savefig('../results/soc-nonsoc_vs_ASRExntT.png')

In [None]:
x = "Soc-NonSoc_pc"
y = "ASR_Intn_T"
z = "ASR_Extn_T"

inds = ~np.isnan(data[x]) & ~np.isnan(data[y]) & ~np.isnan(data[z])
print(inds.sum())

rs_xy = stats.spearmanr(data[x][inds], data[y][inds])[0]
rs_xz = stats.spearmanr(data[x][inds], data[z][inds])[0]
rs_yz = stats.spearmanr(data[y][inds], data[z][inds])[0]
print(rs_xy)
print(rs_xz)
print(rs_yz)

Determine whether the difference in correlation between `Soc-NonSoc_pc` and internalizing vs externalizing symptoms is statistically significant:

In [None]:
n = inds.sum()

xy = stats.spearmanr(data[x][inds], data[y][inds])[0]
xz = stats.spearmanr(data[x][inds], data[z][inds])[0]
yz = stats.spearmanr(data[y][inds], data[z][inds])[0]

dependent_corr(xy, xz, yz, n, twotailed=False)

## 2. RANDOM MECH

In [None]:
def box_plot(data, edge_color, fill_color,pos,v):
    # customize boxplots
    alpha= .2
    data = data[~np.isnan(data)]
    bp = ax.boxplot(data, positions = [pos], patch_artist=True,widths=.6,vert=v,flierprops = dict(markeredgecolor=edge_color,
    markerfacecolor=fill_color, alpha=alpha))
    
    for element in ['boxes', 'whiskers', 'fliers', 'means', 'caps']:
        plt.setp(bp[element], color=edge_color)
    for element in ['medians']:
        plt.setp(bp[element], color='k',linewidth=2,ls='dashed')

    for patch in bp['boxes']:
        patch.set(facecolor=fill_color,alpha=alpha)#'w'       
        
    return bp

In [None]:
#red_rgb = [103,0,31] # edges of RdBu
#blue_rgb = [5,48,97] # edges of RdBu
red_rgb =[188,61,62] # from Emily
blue_rgb = [54,122,177] # from Emily
red_rgb = np.array(red_rgb)/255
blue_rgb = np.array(blue_rgb)/255
alpha = .2 # transparency inside boxplots, for datapts etc.

#colors = [red_rgb,blue_rgb]
# Set your custom color palette
#myPalette = sns.set_palette(sns.color_palette(colors))

In [None]:
#Look at responses on RANDMECH:
all_trial_data = pd.read_csv(os.path.join(behavioral_data_drive, 'hcp_social_826subs.csv'))
all_trial_data.set_index("subj_idx", inplace=True)
all_trial_data.index.rename("Subject", inplace=True)
all_trial_data.index = all_trial_data.index.map(str)
all_trial_data.head()

data = data.join(all_trial_data[all_trial_data["movie"]=="Random mechanical.AVI"]["response"])
data.rename(columns={"response": "rand_mech_response"}, inplace=True)
data.head()

In [None]:
# Create a new variable denoting subjects who responded either "yes" or "unsure"
data["rand_mech_yesorunsure"] = data["rand_mech_response"] > 0
y = "ASR_Intn_T"

In [None]:
#data1 = data.loc[data['rand_mech_response']!=9,:]
#data1.shape
fig, ax = plt.subplots(figsize=(5,6))
y = "ASR_Intn_T"
#x = "rand_mech_response"
x = "rand_mech_yesorunsure"
rows = data[x] == 0
bp2 = box_plot(data.loc[rows,y], blue_rgb, blue_rgb,0,True)
print(f'"Non-social": Mean int score: {np.nanmean(data.loc[rows,y])},SE: {stats.sem(data.loc[rows,y],nan_policy="omit")}')
rows = data[x] == 1
bp1 = box_plot(data.loc[rows,y], red_rgb, red_rgb,1,True)
print(f'"Social" or "Unsure": Mean int score: {np.nanmean(data.loc[rows,y])},SE: {stats.sem(data.loc[rows,y],nan_policy="omit")}')
plt.xticks(range(2),['"Non-social"','"Social"/\n "Unsure"'])
tscore, p = stats.ttest_ind(data[data[x]==1][y], data[data[x]==0][y], nan_policy='omit')
print(f't={tscore}, p={p}')
#plt.ylabel('ASR_Intn_T')
plt.ylim(30,104)
plt.plot([0,0,1,1],[96,98,98,96],linewidth = 1,color='k')
plt.plot([0.5],[99]*1,'*',color='k')

#ax.annotate(f't = {scoret:.2f}\n(p = {p:.2g})', xy=(.02,.85), xycoords='axes fraction')
plt.title('RANDOM MECH',fontweight='bold')
plt.ylabel('Internalizing score (T)')
#plt.ylim(0,100)
fig.savefig('../results/RANDMECHresp_vs_ASR_Intn_T.png',bbox_inches='tight')

In [None]:
df_rand_mech = data[['rand_mech_yesorunsure', 'ASR_Extn_T', 'ASR_Intn_T']]
df_rand_mech['int_ext'] = df_rand_mech['ASR_Intn_T'] - df_rand_mech['ASR_Extn_T']

x = 'rand_mech_yesorunsure'
y = 'int_ext'

tscore_rand_intext, p_rand_intext = stats.ttest_ind(df_rand_mech[df_rand_mech[x]==1][y], df_rand_mech[df_rand_mech[x]==0][y], nan_policy='omit')
tscore_rand_intext,p_rand_intext

In [None]:
df_rand_mech

In [None]:
intn_ = df_rand_mech[['rand_mech_yesorunsure', 'ASR_Intn_T']]
intn_['trait_type'] = 'Intn'
extn_ = df_rand_mech[['rand_mech_yesorunsure', 'ASR_Extn_T']]
extn_['trait_type'] = 'Extn'
df_rand_mech_long  = intn_.append(extn_)
df_rand_mech_long.reset_index(inplace = True)
trait_score = np.empty((df_rand_mech_long.shape[0],))
trait_score[:] = np.nan
rows = df_rand_mech_long['trait_type']=='Intn'
trait_score[rows] = df_rand_mech_long.loc[rows,'ASR_Intn_T']
rows = df_rand_mech_long['trait_type']=='Extn'
trait_score[rows] = df_rand_mech_long.loc[rows,'ASR_Extn_T']
df_rand_mech_long['trait_score'] = trait_score
df_rand_mech_long.drop(['ASR_Intn_T', 'ASR_Extn_T'],axis = 1, inplace=True)
df_rand_mech_long.rename(columns={'rand_mech_yesorunsure': 'socialness'},  inplace=True)
df_rand_mech_long

In [None]:

df_rand_mech_long.dropna(inplace=True)
df_rand_mech_long.shape

In [None]:
model = Lmer('trait_score ~  socialness + trait_type + socialness * trait_type + (1|Subject)', data=df_rand_mech_long) # ff: mean response, rf:subjID, movie
#model.fit(levels)
model.fit(factors={"socialness": ["False", "True"]})


In [None]:

from pymer4.models import Lmer

model = Lmer('trait_score ~  socialness + trait_type + socialness * trait_type + (1|Subject)', data=df_rand_mech_long) # ff: mean response, rf:subjID, movie
#model.fit(levels)
model.fit(factors={"socialness": ["False", "True"]})


In [None]:
yes_unsure = df_rand_mech[df_rand_mech[x]==1][y]
no = df_rand_mech[df_rand_mech[x]==0][y]
yes_unsure.mean(), yes_unsure.sem()

In [None]:
no.mean(), no.sem()

In [None]:
#data1 = data.loc[data['rand_mech_response']!=9,:]
#data1.shape
fig, ax = plt.subplots(figsize=(5,6))

y = "ASR_Extn_T"
#x = "rand_mech_response"
x = "rand_mech_yesorunsure"
rows = data[x] == 0
bp2 = box_plot(data.loc[rows,y], blue_rgb, blue_rgb,0,True)
print(f'"Non-social": Mean int score: {np.nanmean(data.loc[rows,y])},SE: {stats.sem(data.loc[rows,y],nan_policy="omit")}')
rows = data[x] == 1
bp1 = box_plot(data.loc[rows,y], red_rgb, red_rgb,1,True)
print(f'"Social" or "Unsure": Mean int score: {np.nanmean(data.loc[rows,y])},SE: {stats.sem(data.loc[rows,y],nan_policy="omit")}')
plt.xticks(range(2),['"Non-social"','"Social"/\n "Unsure"'])
tscore, p = stats.ttest_ind(data[data[x]==1][y], data[data[x]==0][y], nan_policy='omit')
print(f't={tscore}, p={p}')
#plt.ylabel('ASR_Extn_T')
plt.plot([0,0,1,1],[93,95,95,93],linewidth = 1,color='k')
plt.plot([0.5],[97]*1,'+',color='k')
   
#ax.annotate(f't = {tscore:.2f}\n(p = {p:.2g})', xy=(.02,.85), xycoords='axes fraction')
plt.title('RANDOM MECH',fontweight='bold')
plt.ylabel('Externalizing score (T)')
fig.savefig('../results/RANDMECHresp_vs_ASR_Extn_T.png',bbox_inches='tight')


(extra analysis we don't use) Look at RT on RANDMECH:

In [None]:
data = data.join(all_trial_data[all_trial_data["movie"]=="Random mechanical.AVI"]["rt"])
data = data.rename(columns={"rt": "rand_mech_rt"})
data.head()

In [None]:
g = explore_correlation(x="rand_mech_rt", y="ASR_Intn_T", data=data)
# no correlation between RT and internalizing score

In [None]:
# Restrict correlation to only subjects that responded "yes" or "unsure"

g = explore_correlation(x="rand_mech_rt", y="ASR_Intn_T", data=data[data["rand_mech_yesorunsure"]==1])

# (c) "Unsure" responses for Mental and Random vs. ASR_Int 

In [None]:
x = "pc_unc_total"
g = explore_correlation(x, y="ASR_Intn_T")
plt.xlabel('Mean(%Unsure"Social", %Unsure"Non-social")')
plt.xlim(0,100)
#g.savefig('../results/soc-nonsoc_vs_loneliness.png')

In [None]:
x = "pc_unc_Rand"
g = explore_correlation(x, y="ASR_Intn_T")
plt.ylim(30,100)
plt.yticks(10*np.arange(3,11))
g.ax_joint.set_ylabel('Internalizing score (T)')#'Mean internalizing beh.\n (T-score, a.u.)')
g.ax_joint.annotate(r"$ r_{S}=.098$"f'\n(p='r"$.005)$", xy=(.02,.85), xycoords='axes fraction',fontsize=20) # 0.1, .003
g.ax_joint.set_xlabel('%"Unsure" Random')
g.ax_joint.set_xlim(-3,100)
g.savefig('../results/unsure_random_vs_asr_int.png')

x = "pc_unc_Mental"
g = explore_correlation(x, y="ASR_Intn_T")
plt.ylim(30,100)
plt.yticks(10*np.arange(3,11))
g.ax_joint.set_ylabel('Internalizing score (T)')# 'Mean internalizing beh.\n (T-score, a.u.)')
g.ax_joint.annotate(r"$ r_{S}=.024$"f'\n(p='r"$.49)$", xy=(.07,.85), xycoords='axes fraction',fontsize=20) # 0.1, .003
g.ax_joint.set_xlabel('%"Unsure" Mental')
g.ax_joint.set_xlim(-3,100)
g.savefig('../results/unsure_mental_vs_asr_int.png')

In [None]:
x = 'ASR_Intn_T'
y = 'pc_unc_Rand'
z = 'pc_unc_Mental'

inds = ~np.isnan(data[x]) & ~np.isnan(data[y]) & ~np.isnan(data[z])

n = inds.sum()
xy = stats.spearmanr(data[x][inds], data[y][inds])[0]
xz = stats.spearmanr(data[x][inds], data[z][inds])[0]
yz = stats.spearmanr(data[y][inds], data[z][inds])[0]

dependent_corr(xy, xz, yz, n, twotailed=False)

In [None]:
y = "ASR_Extn_T"
'''x = "pc_unc_total"
g = explore_correlation(x, y)
plt.xlabel('Mean(%Unsure"Social", %Unsure"Non-social")')
plt.xlim(0,100)'''
#g.savefig('../results/soc-nonsoc_vs_loneliness.png')

x = "pc_unc_Rand"
g = explore_correlation(x, y)
plt.xlabel('%Unsure Random')
plt.xlim(-1,100)
plt.ylim(30,100)
plt.yticks(10*np.arange(3,11))
plt.ylabel('Mean externalizing beh.\n (T-score, a.u.)')
g.savefig('../results/unsure_random_vs_asr_ext.png')

x = "pc_unc_Mental"
g = explore_correlation(x, y)
plt.xlabel('%Unsure Mental')
plt.xlim(-1,100)
plt.ylim(30,100)
plt.yticks(10*np.arange(3,11))
plt.ylabel('Mean externalizing beh.\n (T-score, a.u.)')
g.savefig('../results/unsure_mental_vs_asr_ext.png')


# Compare internalizing and externalizing scores

In [None]:
x = 'pc_unc_Rand'
y = 'ASR_Intn_T'
z = 'ASR_Extn_T'

inds = ~np.isnan(data[x]) & ~np.isnan(data[y]) & ~np.isnan(data[z])

n = inds.sum()
xy = stats.spearmanr(data[x][inds], data[y][inds])[0]
xz = stats.spearmanr(data[x][inds], data[z][inds])[0]
yz = stats.spearmanr(data[y][inds], data[z][inds])[0]

dependent_corr(xy, xz, yz, n, twotailed=False)

In [None]:
x = 'pc_unc_Mental'
y = 'ASR_Intn_T'
z = 'ASR_Extn_T'

inds = ~np.isnan(data[x]) & ~np.isnan(data[y]) & ~np.isnan(data[z])

n = inds.sum()
xy = stats.spearmanr(data[x][inds], data[y][inds])[0]
xz = stats.spearmanr(data[x][inds], data[z][inds])[0]
yz = stats.spearmanr(data[y][inds], data[z][inds])[0]

dependent_corr(xy, xz, yz, n, twotailed=False)

# Trying LME

In [None]:
from pymer4.models import Lmer, Lm

In [None]:
#data.head()
list(data.columns)[:5]

In [None]:
np.tile([1,2],10), np.repeat([1,2],10)

In [None]:
nan_rows = (np.isnan(data['pc_unc_Rand'])) | (np.isnan(data['pc_unc_Mental']))
data1 = data.loc[~nan_rows,:]
data.shape, data1.shape

In [None]:
subs = list(data.index)
pc_Uns_Rand = data['pc_unc_Rand']
pc_Uns_Mental = data['pc_unc_Mental']
ASR_Intn_T = data['ASR_Intn_T']
ASR_Extn_T = data['ASR_Extn_T']

subs_all = np.tile(subs,2)
cond = []
cond.extend(np.repeat('Random',len(subs)))
cond.extend(np.repeat('Mental',len(subs)))
pc_Unsure = []
pc_Unsure.extend(pc_Uns_Rand)
pc_Unsure.extend(pc_Uns_Mental)

df = pd.DataFrame({'Subs':subs_all, 'cond': cond, 'pc_Unsure': pc_Unsure, 'Int': np.tile(ASR_Intn_T,2), 'Ext': np.tile(ASR_Extn_T,2)})
#df['Int_Ext'] = df ['Int'] - df['Ext']
df

In [None]:
df1 = pd.melt(df, id_vars = ['Subs','cond', 'pc_Unsure'], var_name = 'trait_type', value_name = 'trait_score')#, value_vars=['Int','Ext'])
df1.dropna(inplace=True)
df1

In [None]:
model = Lmer(' pc_Unsure  ~  trait_type + trait_score + cond  + trait_type*trait_score*cond + (1|Subs)', data=df1) # ff: mean response, rf:subjID, movie
#model.fit()
model.fit(factors={"cond": ["Random", "Mental"]})

In [None]:
model = Lm(' pc_Unsure  ~  trait_type + trait_score + stimType + trait_type*trait_score + trait_type*stimType + trait_score*stimType + trait_type*trait_score*stimType ', data=df1) # ff: mean response, rf:subjID, movie
#model.fit()
model.fit()#factors={"cond": ["Random", "Mental"]})

In [None]:
df1.rename(columns={'cond':'stimType'}, inplace=True)

In [None]:
model = Lmer(' pc_Unsure  ~  trait_type + trait_score + stimType + trait_type*trait_score + trait_type*stimType + trait_score*stimType + trait_type*trait_score*stimType + (1|Subs)', data=df1) # ff: mean response, rf:subjID, movie
#model.fit()
model.fit(factors={"stimType": ["Random", "Mental"]})

In [None]:
'''model = Lmer('trait_score ~  trait_type + pc_Unsure + cond + trait_type*pc_Unsure + trait_type*cond + pc_Unsure*cond + trait_type*cond*pc_Unsure + (1|Subs)', data=df1) # ff: mean response, rf:subjID, movie
#model.fit()
model.fit(factors={"cond": ["Random", "Mental"]})'''

In [None]:
df_MentRand = data[['pc_unc_Rand', 'pc_unc_Mental','ASR_Intn_T', 'ASR_Extn_T']]
df_MentRand['Rand_Ment'] = df_MentRand['pc_unc_Rand'] - df_MentRand['pc_unc_Mental'] 
df_MentRand

In [None]:
intn_ = df_MentRand[['Rand_Ment','ASR_Intn_T']]
intn_['trait_type'] = 'Int'
extn_ = df_MentRand[['Rand_Ment','ASR_Extn_T']]
extn_['trait_type'] = 'Ext'

df_long1  = intn_.append(extn_)
df_long1.reset_index(inplace = True)
trait_score = np.empty((df_long1.shape[0],))
trait_score[:] = np.nan
rows = df_long1['trait_type']=='Int'
trait_score[rows] = df_long1.loc[rows,'ASR_Intn_T']
rows = df_long1['trait_type']=='Ext'
trait_score[rows] = df_long1.loc[rows,'ASR_Extn_T']
df_long1['trait_score'] = trait_score
df_long1.drop(['ASR_Intn_T', 'ASR_Extn_T'],axis = 1, inplace=True)
df_long1.dropna(inplace = True)
df_long1

In [None]:
sns.scatterplot(x = 'trait_score', y = 'Rand_Ment', hue = 'trait_type', data = df_long1)


In [None]:
#model = Lmer('Rand_Ment ~  trait_type + trait_score + trait_type*trait_score + (1|Subject)', data=df_long1) # ff: mean response, rf:subjID, movie
model = Lmer('trait_score ~  trait_type + Rand_Ment + trait_type*Rand_Ment + (1|Subject)', data=df_long1) # ff: mean response, rf:subjID, movie
model.fit()

In [None]:
model = Lmer('Rand_Ment ~  trait_type + trait_score + trait_type*trait_score + (1|Subject)', data=df_long1) # ff: mean response, rf:subjID, movie
#model = Lmer('trait_score ~  trait_type + Rand_Ment + trait_type*Rand_Ment + (1|Subject)', data=df_long1) # ff: mean response, rf:subjID, movie
model.fit()

In [None]:
df_MentRand = pd.DataFrame({'Subs':subs_all, 'cond': cond, 'pc_Unsure': pc_Unsure, 'Int': np.tile(ASR_Intn_T,2), 'Ext': np.tile(ASR_Extn_T,2)})
df['Int_Ext'] = df ['Int'] - df['Ext']

In [None]:
nan_rows = np.isnan(df['pc_Unsure']) | np.isnan(df['Int']) | np.isnan(df['Ext'])
df = df.loc[~nan_rows,:]
nan_rows1 = np.isnan(df['pc_Unsure']) | np.isnan(df['Int']) | np.isnan(df['Ext'])
np.where(nan_rows), np.where(nan_rows1)

In [None]:
model = Lmer('pc_Unsure ~  cond + (1|Subs)', data=df) # ff: mean response, rf:subjID, movie
model.fit()

In [None]:
intn_ = df[['pc_Unsure', 'cond', 'Int', 'Subs']]
intn_['trait_type'] = 'Int'
extn_ = df[['pc_Unsure', 'cond', 'Ext', 'Subs']]
extn_['trait_type'] = 'Ext'
df_long  = intn_.append(extn_)
df_long.reset_index(inplace = True)
trait_score = np.empty((df_long.shape[0],))
trait_score[:] = np.nan
rows = df_long['trait_type']=='Int'
trait_score[rows] = df_long.loc[rows,'Int']
rows = df_long['trait_type']=='Ext'
trait_score[rows] = df_long.loc[rows,'Ext']
df_long['trait_score'] = trait_score
df_long.drop(['Int', 'Ext'],axis = 1, inplace=True)

#df_long.dropna(inplace=True)
#df_long.shape
df_long


In [None]:
df_long.dropna(inplace=True)
df_long.shaape

In [None]:
df_long.loc[df_long['trait_type']=='Int',:]

In [None]:

odel = Lmer('pc_Unsure ~  cond + trait_score + cond * trait_score + (1|Subs)', data=df_long.loc[df_long['trait_type']=='Int',:]) # ff: mean response, rf:subjID, movie
#model.fit(levels)
model.fit(factors={"cond": ["Random", "Mental"]})

# TO BE CONTINUED!!!


In [None]:

odel = Lmer('pc_Unsure ~  cond + trait_score + cond * trait_score + (1|Subs)', data=df_long.loc[df_long['trait_type']=='Int',:]) # ff: mean response, rf:subjID, movie
#model.fit(levels)
model.fit(factors={"cond": ["Random", "Mental"]})

# TO BE CONTINUED!!!


In [None]:
model = Lmer('pc_Unsure ~  cond + trait_score + cond * trait_score + (1|Subs)', data=df_long.loc[df_long['trait_type']=='Ext',:]) # ff: mean response, rf:subjID, movie
#model.fit(levels)
model.fit(factors={"cond": ["Random", "Mental"]})

# TO BE CONTINUED!!!


In [None]:
model = Lmer('pc_Unsure ~  cond + trait_score + trait_type + cond * trait_score + (1|Subs)', data=df_long) # ff: mean response, rf:subjID, movie
#model.fit(levels)
model.fit(factors={"cond": ["Random", "Mental"]})

# TO BE CONTINUED!!!


In [None]:
model = Lmer('trait_score ~  pc_Unsure + cond + cond * pc_Unsure + (1|Subject)', data=pc_Unsure) # ff: mean response, rf:subjID, movie
#model.fit(levels)
model.fit(factors={"socialness": ["False", "True"]})

# TO BE CONTINUED!!!


In [None]:
model = Lmer('pc_Unsure ~  Int_Ext + (1|Subs)', data=df) # ff: mean response, rf:subjID, movie
model.fit()

In [None]:
model = Lm('pc_Unsure ~  cond + Int_Ext + cond*Int_Ext', data=df) # ff: mean response, rf:subjID, movie
model.fit(factors={"cond":["Random","Mental"]})
#model.fit()

In [None]:
model = Lm('pc_Unsure ~  cond + Int*cond', data=df) # ff: mean response, rf:subjID, movie
model.fit(factors={"cond":["Random","Mental"]})
#model.fit()

In [None]:
model = Lm('pc_Unsure ~  cond + Ext*cond', data=df) # ff: mean response, rf:subjID, movie
model.fit(factors={"cond":["Random","Mental"]})
#model.fit()

In [None]:
model = Lm('pc_Unsure ~  cond + Int + Ext', data=df) # ff: mean response, rf:subjID, movie
model.fit(factors={"cond":["Random","Mental"]})
#model.fit()

In [None]:
model = Lmer('pc_Unsure ~  cond + Int_Ext + cond*Int_Ext + (1|Subs)', data=df) # ff: mean response, rf:subjID, movie
model.fit(factors={"cond":["Random","Mental"]})
#model.fit()