## import

In [1]:
import logging
import os

import torch
import pyro
from pyro.optim import Adam
from pyro.infer import SVI, Trace_ELBO, MCMC, NUTS
import pyro.distributions as dist
import pyro.distributions.constraints as constraints

from sklearn import preprocessing
from sklearn.metrics import r2_score
from sklearn.metrics import cohen_kappa_score

import warnings
warnings.simplefilter(action="ignore",category=FutureWarning)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from autorank import autorank, plot_stats, create_report, latex_table
from pyirr import intraclass_correlation

pyro.__version__

'1.8.4'

In [2]:
import extract_correct_csv
valid_sub = extract_correct_csv.extract_only_valid_subject()

In [3]:
def anxious_subjects(path, n, type='top'):
    '''

    :param path: path sias score or linear deviation score
    :param n: number of subjects
    :param type: 'top' or 'bot'
    :return: top or bot n subjects sorted by sias score
    '''
    valid_subjects = extract_correct_csv.extract_only_valid_subject()
    df = pd.read_csv(path).dropna().reset_index(drop=True)
    df = df[df.subject.isin(valid_subjects)]
    df['subject'] = [int(x) for x in df['subject']]
    if type=='top':
        return df.sort_values(by=df.columns[1], ascending=False).subject[:n].values
    else:
        return df.sort_values(by=df.columns[1], ascending=False).subject[-n:].values


In [4]:
# read data
df = pd.read_csv('data/newLookAtMe/newLookAtMe02.csv')
df_rational = df[['morphing level', 'shock']] #consider only morphing level and shock
df_rational['shock'] = df_rational['shock'].astype(int) # setting shock as int instead of boolean
df_rational['morphing level'] = [int(d==6) for d in df_rational['morphing level']] # if morphing level==6 -> 1
df_rational = df_rational.to_numpy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rational['shock'] = df_rational['shock'].astype(int) # setting shock as int instead of boolean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rational['morphing level'] = [int(d==6) for d in df_rational['morphing level']] # if morphing level==6 -> 1


In [None]:
def counter_window(data, k=0):
    N = data.shape[0]
    counter = torch.zeros((N,4))
    for i in range(len(data)):
        dict_ = {'[0 0]':0, '[0 1]': 0, '[1 0]':0, '[1 1]':0}
        if k == 0 or k > i:
            tmp_data = data[:i+1]
        else:
            tmp_data = data[i-k:i+1]
            #print('im here')
        # count occurencies
        for x in tmp_data:
            dict_[str(x)] += 1
        values = np.array(list(dict_.values()))
        counter[i] = torch.tensor(values)
    return counter

In [None]:
counter = counter_window(df_rational, 3)
counter = counter.reshape((len(df_rational), 2, 2))

In [None]:
# categorical/multinomial distribution

# uniform prior
prior_counts = torch.ones((2,2))


#model
def model(data):
    prior = pyro.sample("prior", dist.Dirichlet(prior_counts))
    total_counts = int(data.sum())
    pyro.sample("likelihood", dist.Multinomial(total_counts, prior), obs=data)


nuts_kernel = NUTS(model)
num_samples, warmup_steps = (300, 200)

mcmc = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=True)
all_means = []

# sampling
for i in range(len(counter)):
    mcmc.run(counter[i])
    hmc_samples = {k: v.detach().cpu().numpy()
                   for k, v in mcmc.get_samples().items()}
    means = hmc_samples['prior'].mean(axis=0)
    stds = hmc_samples['prior'].std(axis=0)
    print('observation: ', df_rational[i])
    print('probabilities: ', means)
    all_means.append(means)

# Analysis sliding window K

In [6]:
k_wind = [2, 5, 10, 25, 50, 100, 150]
HAB_TRIALS = 16

path_sias = 'data/sias_score.csv'
path_lds = 'data/lds_subjects.csv'

len_sub = 6
top_lds = anxious_subjects(path_lds, len_sub, 'top')
bot_lds = anxious_subjects(path_lds, len_sub, 'bot')

top_sias = anxious_subjects(path_sias, len_sub)
bot_sias = anxious_subjects(path_sias, len_sub, 'bot')

In [21]:
df_correlation = pd.DataFrame(columns=['subject','k','pearson','r2score','person_disc','cohen_disc'])

for k in list(k_wind):

    # read output of the rational model with different K
    array_csplus_simulated = np.load('output/pyro/sliding_wind/k'+str(k)+'_csplus.npy',allow_pickle=True)
    array_csminus_simulated = np.load('output/pyro/sliding_wind/k'+str(k)+'_csminus.npy',allow_pickle=True)
    total_array_simulated = np.load('output/pyro/sliding_wind/k'+str(k)+'_total.npy',allow_pickle=True)

    rating_rational_subj = total_array_simulated[HAB_TRIALS:] #remove habituation trials


    for sub in valid_sub:
        subj_ = extract_correct_csv.read_correct_subject_csv(sub)

        #read data of real subjects
        df_sub = pd.read_csv('data/newLookAtMe/newLookAtMe'+subj_+'.csv')
        df_sub = df_sub[['shock', 'rating', 'morphing level']]
        df_sub['shock'] = df_sub['shock'].astype(int) #convert shock from boolean to int
        df_sub['morphing level'] = [int(d == 6) for d in df_sub['morphing level']]
        df_sub['rating'] = df_sub['rating'].replace([1, 2, 3, 4, 5], [0.2, 0.4, 0.6, 0.8, 1]) #convert vote into (0,1)
        df_sub_learn = df_sub[HAB_TRIALS:] #remove habituation trials
        rating_sub = np.array(df_sub_learn['rating'])

        rating_rational = rating_rational_subj
        #remove trial from rating agent simulation and real data if in one list is nan
        bad = ~np.logical_or(np.isnan(rating_sub), np.isnan(rating_rational))
        rating_sub = np.compress(bad, rating_sub)
        rating_rational = np.compress(bad, rating_rational)

        #discretization of rating rational
        round_vector = np.array([0.2, 0.4, 0.6, 0.8, 1])
        rating_rational_discr = np.round(rating_rational / 0.2) * 0.2
        rating_rational_discr = np.clip(rating_rational_discr, round_vector.min(), round_vector.max())

        # calculate pearson correlation coefficient between k-rational model and real data
        pearson = round(np.corrcoef(rating_sub,rating_rational)[0][1],2)

        # calculate r2 score between k-rational model and real data
        r2 = round(r2_score(rating_sub,rating_rational),2)

        # calculate pearson correlation coefficient between k-rational model and real data using discrete values for k-rational model
        pearson_disc = round(np.corrcoef(rating_sub,rating_rational_discr)[0][1],2)

        # calculate cohen kappa between k-rational model and real data using discrete values for k-rational model
        cohen_disc = round(cohen_kappa_score(rating_sub*10,rating_rational_discr*10),2)

        # write line
        df_tmp = pd.DataFrame({'subject':sub,'k':k,'pearson':pearson,'r2score':r2,'person_disc':pearson_disc,'cohen_disc':cohen_disc},index=np.arange(1))
        df_correlation = pd.concat([df_correlation,df_tmp])

df_correlation['subject'] = [float(x) for x in df_correlation['subject']] #convert subjects into float

# read social anxiety values
sias_df = pd.read_csv('data/sias_score.csv').drop(columns='social_anxiety')
sias_df['subject'] = [float(x) for x in sias_df['subject']] #convert subjects into float

# read linear deviation score (how much this subject do fear generalization)
lds_df = pd.read_csv('data/lds_subjects.csv')
lds_df['subject'] = [float(x) for x in lds_df['subject']] #convert subjects into float

In [22]:
# one dataframe with social anxiety, linear devation score and correlation measures for each subject
res = pd.merge(sias_df, lds_df)
df_correlation = pd.merge(res,df_correlation).dropna().reset_index(drop=True)

In [23]:
df_correlation

Unnamed: 0,subject,sias_score,lds,k,pearson,r2score,person_disc,cohen_disc
0,1.0,21.0,0.201,2,0.24,-1.54,0.27,0.00
1,1.0,21.0,0.201,5,0.30,-2.35,0.30,0.06
2,1.0,21.0,0.201,10,0.29,-3.22,0.28,0.03
3,1.0,21.0,0.201,25,0.26,-4.27,0.23,0.00
4,1.0,21.0,0.201,50,0.23,-4.78,0.22,-0.01
...,...,...,...,...,...,...,...,...
205,54.0,17.0,1.594,10,0.87,0.03,0.83,0.16
206,54.0,17.0,1.594,25,0.89,-0.14,0.87,0.14
207,54.0,17.0,1.594,50,0.90,-0.27,0.85,0.14
208,54.0,17.0,1.594,100,0.91,-0.31,0.85,0.14


# check from this if there's something useful

## Rational agent discretisation

In [None]:
valid_subjects = extract_correct_csv.extract_only_valid_subject()
len(valid_subjects)

In [None]:
total_array_simulated = np.load('output/pyro/complete_rational/total.npy',allow_pickle=True)
total_array_simulated

In [None]:
values = np.array([0, 0.2, 0.4, 0.6, 0.8, 1])
discretized_data = np.digitize(total_array_simulated, values)
print(discretized_data.shape)


df_global = pd.DataFrame(columns=['Subject', 'Rating rational', 'Rating real'])

for sub in valid_subjects:
    string_sub = extract_correct_csv.read_correct_subject_csv(sub)
    df_sub = pd.read_csv('data/newLookAtMe/newLookAtMe'+string_sub+'.csv')
    df_sub = df_sub[16:]
    tmp_df = pd.DataFrame({'Subject': sub, 'Rating rational': discretized_data, 'Rating real': df_sub['rating']})
    df_global = pd.concat([df_global, tmp_df])

df_global = df_global.reset_index(drop=True)

In [None]:
valid_subjects = df_global.Subject.unique()
valid_subjects

In [None]:
dict_results = {}
for x in valid_subjects:
    df_sub_ = df_global[df_global.Subject == x].dropna().drop(columns=['Subject']).reset_index(drop=True)
    df_sub_['Rating rational'] = df_sub_['Rating rational'].astype(float)
    result = intraclass_correlation(df_sub_).value
    dict_results[x] = result

dict_results


In [None]:
df_icc = pd.DataFrame(list(dict_results.items()), columns=['Subject', 'ICC'])
df_icc

In [None]:
path_sias = 'data/sias_score.csv'
path_lds = 'data/lds_subjects.csv'

len_sub = 6
top_lds = anxious_subjects(path_lds, len_sub, 'top')
bot_lds = anxious_subjects(path_lds, len_sub, 'bot')

top_sias = anxious_subjects(path_sias, len_sub)
bot_sias = anxious_subjects(path_sias, len_sub, 'bot')

In [None]:
mean_top_sias = df_icc[df_icc.Subject.isin(top_sias)]['ICC'].mean()
mean_bot_sias = df_icc[df_icc.Subject.isin(bot_sias)]['ICC'].mean()
mean_top_lds = df_icc[df_icc.Subject.isin(top_lds)]['ICC'].mean()
mean_bot_lds = df_icc[df_icc.Subject.isin(bot_lds)]['ICC'].mean()

In [None]:
print(f'Average of the first {len_sub} subjects with higher sias score: {round(mean_top_sias, 2)}')
print(f'Average of the first {len_sub} subjects with lower sias score: {round(mean_bot_sias, 2)}')
print(f'Average of the first {len_sub} subjects with higher linear deviation score: {round(mean_top_lds,2)}')
print(f'Average of the first {len_sub} subjects with lower linear deviation score: {round(mean_bot_lds, 2)}')