## import

In [1]:
import logging
import os

import torch
import pyro
from pyro.optim import Adam
from pyro.infer import SVI, Trace_ELBO, MCMC, NUTS
import pyro.distributions as dist
import pyro.distributions.constraints as constraints

from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import cohen_kappa_score

import warnings
warnings.simplefilter(action="ignore",category=FutureWarning)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from autorank import autorank, plot_stats, create_report, latex_table
from pyirr import intraclass_correlation

pyro.__version__

'1.8.4'

In [2]:
import extract_correct_csv
valid_sub = extract_correct_csv.extract_only_valid_subject()

In [3]:
def anxious_subjects(path, n, type='top'):
    '''

    :param path: path sias score or linear deviation score
    :param n: number of subjects
    :param type: 'top' or 'bot'
    :return: top or bot n subjects sorted by sias score
    '''
    valid_subjects = extract_correct_csv.extract_only_valid_subject()
    df = pd.read_csv(path).dropna().reset_index(drop=True)
    df = df[df.subject.isin(valid_subjects)]
    df['subject'] = [int(x) for x in df['subject']]
    if type=='top':
        return df.sort_values(by=df.columns[1], ascending=False).subject[:n].values
    else:
        return df.sort_values(by=df.columns[1], ascending=False).subject[-n:].values


In [4]:
def counter_window(data, k=0):
    N = data.shape[0]
    counter = torch.zeros((N,4))
    for i in range(len(data)):
        dict_ = {'[0 0]':0, '[0 1]': 0, '[1 0]':0, '[1 1]':0}
        if k == 0 or k > i:
            tmp_data = data[:i+1]
        else:
            tmp_data = data[i-k:i+1]
            #print('im here')
        # count occurencies
        for x in tmp_data:
            dict_[str(x)] += 1
        values = np.array(list(dict_.values()))
        counter[i] = torch.tensor(values)
    return counter

In [5]:
# read data
df = pd.read_csv('data/newLookAtMe/newLookAtMe02.csv')
df_rational = df[['morphing level', 'shock']] #consider only morphing level and shock
df_rational['shock'] = df_rational['shock'].astype(int) # setting shock as int instead of boolean
df_rational['morphing level'] = [int(d==6) for d in df_rational['morphing level']] # if morphing level==6 -> 1
df_rational = df_rational.to_numpy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rational['shock'] = df_rational['shock'].astype(int) # setting shock as int instead of boolean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rational['morphing level'] = [int(d==6) for d in df_rational['morphing level']] # if morphing level==6 -> 1


In [6]:
counter = counter_window(df_rational, 3)
counter = counter.reshape((len(df_rational), 2, 2))

# model sliding window

In [None]:
# categorical/multinomial distribution

# uniform prior
prior_counts = torch.ones((2,2))


#model
def model(data):
    prior = pyro.sample("prior", dist.Dirichlet(prior_counts))
    total_counts = int(data.sum())
    pyro.sample("likelihood", dist.Multinomial(total_counts, prior), obs=data)


nuts_kernel = NUTS(model)
num_samples, warmup_steps = (300, 200)

mcmc = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=True)
all_means = []

# sampling
for i in range(len(counter)):
    mcmc.run(counter[i])
    hmc_samples = {k: v.detach().cpu().numpy()
                   for k, v in mcmc.get_samples().items()}
    means = hmc_samples['prior'].mean(axis=0)
    stds = hmc_samples['prior'].std(axis=0)
    print('observation: ', df_rational[i])
    print('probabilities: ', means)
    all_means.append(means)

# Analysis sliding window K

In [7]:
k_wind = [2, 5, 10, 25, 50, 100, 150]
HAB_TRIALS = 16

path_sias = 'data/sias_score.csv'
path_lds = 'data/lds_subjects.csv'

len_sub = 6
top_lds = anxious_subjects(path_lds, len_sub, 'top')
bot_lds = anxious_subjects(path_lds, len_sub, 'bot')

top_sias = anxious_subjects(path_sias, len_sub)
bot_sias = anxious_subjects(path_sias, len_sub, 'bot')

In [8]:
df_correlation = pd.DataFrame(columns=['subject','k','pearson','r2score','person_disc','cohen_disc','icc'])

for k in list(k_wind):

    # read output of the rational model with different K
    array_csplus_simulated = np.load('output/pyro/sliding_wind/k'+str(k)+'_csplus.npy',allow_pickle=True)
    array_csminus_simulated = np.load('output/pyro/sliding_wind/k'+str(k)+'_csminus.npy',allow_pickle=True)
    total_array_simulated = np.load('output/pyro/sliding_wind/k'+str(k)+'_total.npy',allow_pickle=True)

    rating_rational_subj = total_array_simulated[HAB_TRIALS:] #remove habituation trials


    for sub in valid_sub:
        subj_ = extract_correct_csv.read_correct_subject_csv(sub)

        #read data of real subjects
        df_sub = pd.read_csv('data/newLookAtMe/newLookAtMe'+subj_+'.csv')
        df_sub = df_sub[['shock', 'rating', 'morphing level']]
        df_sub['shock'] = df_sub['shock'].astype(int) #convert shock from boolean to int
        df_sub['morphing level'] = [int(d == 6) for d in df_sub['morphing level']]
        df_sub['rating'] = df_sub['rating'].replace([1, 2, 3, 4, 5], [0.2, 0.4, 0.6, 0.8, 1]) #convert vote into (0,1)
        df_sub_learn = df_sub[HAB_TRIALS:] #remove habituation trials
        rating_sub = np.array(df_sub_learn['rating'])

        rating_rational = rating_rational_subj
        #remove trial from rating agent simulation and real data if in one list is nan
        bad = ~np.logical_or(np.isnan(rating_sub), np.isnan(rating_rational))
        rating_sub = np.compress(bad, rating_sub)
        rating_rational = np.compress(bad, rating_rational)

        #discretization of rating rational
        round_vector = np.array([0.2, 0.4, 0.6, 0.8, 1])
        rating_rational_discr = np.round(rating_rational / 0.2) * 0.2
        rating_rational_discr = np.clip(rating_rational_discr, round_vector.min(), round_vector.max())

        # calculate pearson correlation coefficient between k-rational model and real data
        pearson = round(np.corrcoef(rating_sub,rating_rational)[0][1],2)

        # calculate r2 score between k-rational model and real data
        r2 = round(r2_score(rating_sub,rating_rational),2)

        # calculate pearson correlation coefficient between k-rational model and real data using discrete values for k-rational model
        pearson_disc = round(np.corrcoef(rating_sub,rating_rational_discr)[0][1],2)

        # calculate cohen kappa between k-rational model and real data using discrete values for k-rational model
        cohen_disc = round(cohen_kappa_score(rating_sub*10,rating_rational_discr*10),2)

        # intraclass_correlation
        #df_sub_ = df_global[df_global.Subject == x].dropna().drop(columns=['Subject']).reset_index(drop=True)
        df_icc = pd.DataFrame({'rating_sub': rating_sub, 'rating_rational_discr': rating_rational_discr})
        icc = intraclass_correlation(df_icc).value

        # write line
        df_tmp = pd.DataFrame({'subject':sub,'k':k,'pearson':pearson,'r2score':r2,'person_disc':pearson_disc,'cohen_disc':cohen_disc,'icc':icc},index=np.arange(1))
        df_correlation = pd.concat([df_correlation,df_tmp])

df_correlation['subject'] = [float(x) for x in df_correlation['subject']] #convert subjects into float

# read social anxiety values
sias_df = pd.read_csv('data/sias_score.csv').drop(columns='social_anxiety')
sias_df['subject'] = [float(x) for x in sias_df['subject']] #convert subjects into float

# read linear deviation score (how much this subject do fear generalization)
lds_df = pd.read_csv('data/lds_subjects.csv')
lds_df['subject'] = [float(x) for x in lds_df['subject']] #convert subjects into float

In [9]:
# one dataframe with social anxiety, linear devation score and correlation measures for each subject
res = pd.merge(sias_df, lds_df)
df_correlation = pd.merge(res,df_correlation).dropna().reset_index(drop=True)

In [10]:
df_correlation

Unnamed: 0,subject,sias_score,lds,k,pearson,r2score,person_disc,cohen_disc,icc
0,1.0,21.0,0.201,2,0.24,-1.54,0.27,0.00,-0.010162
1,1.0,21.0,0.201,5,0.30,-2.35,0.30,0.06,0.000117
2,1.0,21.0,0.201,10,0.29,-3.22,0.28,0.03,-0.027478
3,1.0,21.0,0.201,25,0.26,-4.27,0.23,0.00,-0.107454
4,1.0,21.0,0.201,50,0.23,-4.78,0.22,-0.01,-0.136812
...,...,...,...,...,...,...,...,...,...
205,54.0,17.0,1.594,10,0.87,0.03,0.83,0.16,0.443933
206,54.0,17.0,1.594,25,0.89,-0.14,0.87,0.14,0.408906
207,54.0,17.0,1.594,50,0.90,-0.27,0.85,0.14,0.374318
208,54.0,17.0,1.594,100,0.91,-0.31,0.85,0.14,0.334976


In [11]:
path_sias = './data/sias_score.csv'
path_lds = './data/lds_subjects.csv'
len_sub = 6

# extract the 6 most/least generalization subjects and the 6 most/least anxious subjects
top_lds_list = anxious_subjects(path_lds, len_sub, 'top')
bot_lds_list = anxious_subjects(path_lds, len_sub, 'bot')
top_sias_list = anxious_subjects(path_sias, len_sub, 'top')
bot_sias_list = anxious_subjects(path_sias, len_sub, 'bot')

In [23]:
pearson_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['pearson'].median()
pearson_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['pearson'].median()

r2_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['r2score'].median()
r2_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['r2score'].median()

discrete_pearson_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['person_disc'].median()
discrete_pearson_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['person_disc'].median()

icc_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['icc'].median()
icc_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['icc'].median()

cohen_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['cohen_disc'].median()
cohen_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['cohen_disc'].median()

print(f'Correlation between {len_sub} more/less anxiety subjects with the rational agent\n')
print('Pearson\nHigh anxiety: ',pearson_sias_high, ' Low anxiety:',pearson_sias_low)
print('\nR2score\nHigh anxiety: ',round(r2_sias_high,2), ' Low anxiety:',r2_sias_low)
print('\nPearson discrete\nHigh anxiety: ',round(discrete_pearson_sias_high,2), ' Low anxiety:',discrete_pearson_sias_low)
print('\nCohen\nHigh anxiety: ',round(cohen_sias_high,2), ' Low anxiety:',cohen_sias_low)
print('\nICC \nHigh anxiety: ',round(icc_sias_high,2), ' Low anxiety:',cohen_sias_low)

Correlation between 6 more/less anxiety subjects with the rational agent

Pearson
High anxiety:  0.495  Low anxiety: 0.64

R2score
High anxiety:  -0.81  Low anxiety: -0.27

Pearson discrete
High anxiety:  0.5  Low anxiety: 0.61

Cohen
High anxiety:  0.12  Low anxiety: 0.21

ICC 
High anxiety:  0.16  Low anxiety: 0.21


In [21]:
pearson_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['pearson'].median()
pearson_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['pearson'].median()

r2_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['r2score'].median()
r2_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['r2score'].median()

discrete_pearson_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['person_disc'].median()
discrete_pearson_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['person_disc'].median()

icc_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['icc'].median()
icc_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['icc'].median()

cohen_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['cohen_disc'].median()
cohen_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['cohen_disc'].median()

print('Correlation between 5 more/less fear gen subjects with the rational agent\n')
print('Pearson\nHigh fear gen: ',round(pearson_lds_high,2), ' Low fear gen:',round(pearson_lds_low,2))
print('\nR2score\nHigh fear gen: ',round(r2_lds_high,2), ' Low fear gen:',round(r2_lds_low,2))
print('\nPearson discrete\nHigh fear gen: ',discrete_pearson_lds_high, ' Low fear gen:',discrete_pearson_lds_low)
print('\nCohen\nHigh fear gen: ',cohen_lds_high, ' Low fear gen:',cohen_lds_low)
print('\nICC \nHigh fear gen: ',round(icc_lds_high,2), ' Low fear gen:',round(icc_lds_low,2))


Correlation between 5 more/less fear gen subjects with the rational agent

Pearson
High fear gen:  0.76  Low fear gen: 0.26

R2score
High fear gen:  0.09  Low fear gen: -1.96

Pearson discrete
High fear gen:  0.72  Low fear gen: 0.26

Cohen
High fear gen:  0.27  Low fear gen: 0.04

ICC 
High fear gen:  0.43  Low fear gen: -0.12
