In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import warnings

from pandas.errors import SettingWithCopyWarning
from fear_gen import extract_correct_csv
from sklearn.metrics import r2_score, cohen_kappa_score
from pyirr import intraclass_correlation

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=SettingWithCopyWarning)

plt.rcParams['font.size'] = '12'
plt.rcParams['figure.figsize'] = [8, 6]

Function `anxious_subjects` that extract the higher/lower `n` subjects given the score in the `path` parameter

In [None]:
def anxious_subjects(path, n, type_='top'):
    os.chdir('..')
    valid_subjects = extract_correct_csv.extract_only_valid_subject()
    os.chdir('pyro')
    df = pd.read_csv(path).dropna().reset_index(drop=True)
    df = df[df.subject.isin(valid_subjects)]
    df['subject'] = [int(x) for x in df['subject']]
    if type_=='top':
        return df.sort_values(by=df.columns[1], ascending=False).subject[:n].values
    else:
        return df.sort_values(by=df.columns[1], ascending=False).subject[-n:].values


# Rational agent
The idea is to train *n* models where each model is trained using *n* trials.

First thing: extract *morphing level* *shock* and *rating* data of a real subject.

In [None]:
HABITUATION_TRIALS = 16

# read dataset
df = pd.read_csv('../data/newLookAtMe/newLookAtMe02.csv')
df_real = df[['shock', 'rating', 'morphing level']]
df_real['shock'] = df_real['shock'].astype(int) #setting shock as int instead of boolean
df_real['morphing level'] = [int(d==6) for d in df_real['morphing level']] # if morphing level==6 -> 1
df_real['rating'] = df_real['rating'].replace([1, 2, 3, 4, 5], [0.2, 0.4, 0.6, 0.8, 1])

# remove first 16 trials
df_real=df_real[HABITUATION_TRIALS:]

## Shock expectancy simulated

Read output probability of the rational agent model, excluding the first 16 trials, and compare the results with the *rating* given by a real subject (Subject number 2 in the example below).

In [None]:
# load results of rational agent model
array_csplus_simulated = np.load('../output/pyro/complete_rational/csplus.npy',allow_pickle=True)
array_csminus_simulated = np.load('../output/pyro/complete_rational/csminus.npy',allow_pickle=True)
total_array_simulated = np.load('../output/pyro/complete_rational/total.npy',allow_pickle=True)

In [None]:
total_array_simulated

In [None]:
y_plus_real = df_real.loc[df_real['morphing level']==1].rating.values
x_plus_real = np.array(df_real.loc[df_real['morphing level']==1].index)
y_minus_real = df_real.loc[df_real['morphing level']==0].rating.values
x_minus_real = np.array(df_real.loc[df_real['morphing level']==0].index)
y_real=df_real.rating.values

fig = plt.figure(figsize=(13,6))
plt.title('Analysis of the shock prediction comparing subject #2 and rational agent')
plt.scatter(x_plus_real, y_plus_real, color='blue', label='cs+ real')
plt.scatter(array_csplus_simulated[:,0], array_csplus_simulated[:,1], color='darkblue', label='cs+ simulated')
plt.scatter(x_minus_real, y_minus_real, color='red', label='cs- real')
plt.scatter(array_csminus_simulated[:,0], array_csminus_simulated[:,1], color='darkred', label='cs- simulated')
plt.legend(loc='right')
plt.axvline(x=16, linestyle='--', color='green')
plt.axvline(x=48, linestyle='--', color='green')
plt.xlabel('Trial')
plt.ylabel('P(condition | visual stimulus)')
plt.show()


# Analysis of the correlation between the rational agent and all real subjects using Pearson

In [None]:
# extract all valid subjects
os.chdir('..')
valid_sub = extract_correct_csv.extract_only_valid_subject()
os.chdir('pyro')

In [None]:
df_correlation = pd.DataFrame(columns=['subject','pearson','r2score','pearson_disc','cohen_disc'])

path_sias = '../data/sias_score.csv'
path_lds = '../data/lds_subjects.csv'
len_sub = 6

# extract the 6 most/least generalization subjects and the 6 most/least anxious subjects
top_lds_list = anxious_subjects(path_lds, len_sub, 'top')
bot_lds_list = anxious_subjects(path_lds, len_sub, 'bot')
top_sias_list = anxious_subjects(path_sias, len_sub, 'top')
bot_sias_list = anxious_subjects(path_sias, len_sub, 'bot')

In [None]:
for sub in valid_sub:
        subj_ = extract_correct_csv.read_correct_subject_csv(sub)

        #read data of real subjects
        df_sub = pd.read_csv('../data/newLookAtMe/newLookAtMe'+subj_+'.csv')
        df_sub = df_sub[['shock', 'rating', 'morphing level']]
        df_sub['shock'] = df_sub['shock'].astype(int) #convert shock from boolean to int
        df_sub['morphing level'] = [int(d == 6) for d in df_sub['morphing level']]
        df_sub['rating'] = df_sub['rating'].replace([1, 2, 3, 4, 5], [0.2, 0.4, 0.6, 0.8, 1]) #convert vote into (0,1)
        df_sub_learn = df_sub[HABITUATION_TRIALS:] #remove habituation trials
        rating_sub = np.array(df_sub_learn['rating'])

        # rational agent output
        rating_rational = total_array_simulated
        #remove trial from rating agent simulation and real data if in one list is nan
        bad = ~np.logical_or(np.isnan(rating_sub), np.isnan(rating_rational))
        rating_sub = np.compress(bad, rating_sub)
        rating_rational = np.compress(bad, rating_rational)

        #discretization of rating rational
        round_vector = np.array([0.2, 0.4, 0.6, 0.8, 1])
        rating_rational_discr = np.round(rating_rational / 0.2) * 0.2
        rating_rational_discr = np.clip(rating_rational_discr, round_vector.min(), round_vector.max())

        # calculate pearson correlation coefficient between k-rational model and real data
        pearson = round(np.corrcoef(rating_sub,rating_rational)[0][1],2)

        # calculate r2 score between k-rational model and real data
        r2 = round(r2_score(rating_sub,rating_rational),2)

        # calculate pearson correlation coefficient between k-rational model and real data using discrete values for k-rational model
        pearson_disc = round(np.corrcoef(rating_sub,rating_rational_discr)[0][1],2)

        # calculate cohen kappa between k-rational model and real data using discrete values for k-rational model
        cohen_disc = round(cohen_kappa_score(rating_sub*10,rating_rational_discr*10),2)

        # write line
        df_tmp = pd.DataFrame({'subject':sub,'pearson':pearson,'r2score':r2,'pearson_disc':pearson_disc,'cohen_disc':cohen_disc},index=np.arange(1))
        df_correlation = pd.concat([df_correlation,df_tmp])
df_correlation['subject'] = [float(x) for x in df_correlation['subject']]
df_correlation = df_correlation.reset_index(drop=True)
sias_df = pd.read_csv('../data/sias_score.csv').drop(columns='social_anxiety')
sias_df['subject'] = [float(x) for x in sias_df['subject']]

lds_df = pd.read_csv('../data/lds_subjects.csv')
lds_df['subject'] = [float(x) for x in lds_df['subject']]

df_correlation = pd.concat([sias_df.set_index('subject'), lds_df.set_index('subject'), df_correlation.set_index('subject')], axis=1).reset_index().dropna()

`DataFrame` containing different correlation index between rational agent and real agents and the corresponding **SIAS** and **LDS** values.

In [None]:
!pwd

In [None]:
df_correlation.to_csv('../output/pyro/complete_rational/correlation.csv', index=False)

In [None]:
df_correlation.head(10)

### Scatter correlation plot between LDS values and Pearson

In [None]:
from sklearn.linear_model import LinearRegression
lds_values = df_correlation['lds'].values
pearson_values = df_correlation['pearson'].values
lds_values = lds_values[:, np.newaxis]

reg = LinearRegression()
reg.fit(lds_values, pearson_values)
predict = reg.predict(lds_values)

In [None]:
plt.plot(lds_values, predict, color='red')
plt.scatter(lds_values, pearson_values)
plt.xlabel('Linear Deviation Score')
plt.ylabel('Pearson correlation coefficient')
plt.show()

### Scatter correlation plot between SIAS values and Pearson

In [None]:
sias_values = df_correlation['sias_score'].values
sias_values = sias_values[:, np.newaxis]

reg_sias = LinearRegression()
reg_sias.fit(sias_values, pearson_values)
predict_sias = reg_sias.predict(sias_values)

In [None]:
plt.plot(sias_values, predict_sias, color='red')
plt.scatter(sias_values, pearson_values)
plt.xlabel('sias_values')
plt.ylabel('pearson values')
plt.show()

## Analysis subject with more fear generalization vs less fear generalization

In [None]:
pearson_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['pearson'].median()
pearson_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['pearson'].median()

r2_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['r2score'].median()
r2_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['r2score'].median()

discrete_pearson_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['pearson_disc'].median()
discrete_pearson_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['pearson_disc'].median()

cohen_lds_high = df_correlation[df_correlation.subject.isin(top_lds_list)]['cohen_disc'].median()
cohen_lds_low = df_correlation[df_correlation.subject.isin(bot_lds_list)]['cohen_disc'].median()

print('Correlation between 5 more/less fear gen subjects with the rational agent\n')
print('Pearson\nHigh fear gen: ',pearson_lds_high, ' Low fear gen:',pearson_lds_low)
print('\nR2score\nHigh fear gen: ',r2_lds_high, ' Low fear gen:',r2_lds_low)
print('\nPearson discrete\nHigh fear gen: ',discrete_pearson_lds_high, ' Low fear gen:',discrete_pearson_lds_low)
print('\nCohen\nHigh fear gen: ',cohen_lds_high, ' Low fear gen:',cohen_lds_low)

## Analysis subject with higher sias score vs lower sias score

In [None]:
pearson_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['pearson'].median()
pearson_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['pearson'].median()

r2_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['r2score'].median()
r2_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['r2score'].median()

discrete_pearson_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['pearson_disc'].median()
discrete_pearson_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['pearson_disc'].median()

cohen_sias_high = df_correlation[df_correlation.subject.isin(top_sias_list)]['cohen_disc'].median()
cohen_sias_low = df_correlation[df_correlation.subject.isin(bot_sias_list)]['cohen_disc'].median()

print(f'Correlation between {len_sub} more/less anxiety subjects with the rational agent\n')
print('Pearson\nHigh anxiety: ',pearson_sias_high, ' Low anxiety:',pearson_sias_low)
print('\nR2score\nHigh anxiety: ',r2_sias_high, ' Low anxiety:',r2_sias_low)
print('\nPearson discrete\nHigh anxiety: ',discrete_pearson_sias_high, ' Low anxiety:',discrete_pearson_sias_low)
print('\nCohen\nHigh anxiety: ',cohen_sias_high, ' Low anxiety:',cohen_sias_low)

# Intra class classification
Discretize data

In [None]:
values = np.array([0, 0.2, 0.4, 0.6, 0.8, 1])
discretized_data = np.digitize(total_array_simulated, values)
df_global = pd.DataFrame(columns=['Subject', 'Rating rational', 'Rating real'])

for sub in valid_sub:
    os.chdir('..')
    string_sub = extract_correct_csv.read_correct_subject_csv(sub)
    os.chdir('pyro')
    df_sub = pd.read_csv('../data/newLookAtMe/newLookAtMe'+string_sub+'.csv')
    df_sub = df_sub[16:]
    tmp_df = pd.DataFrame({'Subject': sub, 'Rating rational': discretized_data, 'Rating real': df_sub['rating']})
    df_global = pd.concat([df_global, tmp_df])

df_global = df_global.reset_index(drop=True)

In [None]:
valid_subjects = df_global.Subject.unique()
dict_results = {}
for x in valid_subjects:
    df_sub_ = df_global[df_global.Subject == x].dropna().drop(columns=['Subject']).reset_index(drop=True)
    df_sub_['Rating rational'] = df_sub_['Rating rational'].astype(float)
    result = intraclass_correlation(df_sub_).value
    dict_results[x] = result

In [None]:
df_icc = pd.DataFrame(list(dict_results.items()), columns=['Subject', 'ICC'])

# add ICC values to correlation df
df_all_indexes = pd.concat([df_correlation.set_index('subject'), df_icc.set_index('Subject')], axis=1).reset_index(drop=True).dropna()
df_all_indexes


## Kernel Density Estimation Plot

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
higher_lds = anxious_subjects(path_lds, 2)
lower_lds = anxious_subjects(path_lds, 2, 'bot')

In [None]:
print(higher_lds)
print(lower_lds)

In [None]:
def extract_ratings(subject):
    string_sub = extract_correct_csv.read_correct_subject_csv(subject)
    df_ = pd.read_csv('../data/newLookAtMe/newLookAtMe'+string_sub+'.csv')
    df_ = df_[HABITUATION_TRIALS:]
    df_['morphing level'] = [int(d==6) for d in df_['morphing level']] # if morphing level==6 -> 1
    df_['rating'] = df_['rating'].replace([1, 2, 3, 4, 5], [0.2, 0.4, 0.6, 0.8, 1])
    y_plus_real = df_.loc[df_['morphing level']==1].rating.values
    y_minus_real = df_.loc[df_['morphing level']==0].rating.values
    return y_plus_real, y_minus_real

def dataFrame_kde(simulated, real):
    df_results = pd.DataFrame(columns=['SIM', 'REAL'])
    df_results['SIM'] = simulated
    df_results['REAL'] = real
    return df_results

In [None]:
plus, minus = extract_ratings(higher_lds[1])

In [None]:
df_plus = dataFrame_kde(array_csplus_simulated[:,1], plus)
df_minus = dataFrame_kde(array_csminus_simulated[:,1], minus)

In [None]:
fig = plt.figure(figsize=(7,7))
fig.suptitle(f'Subject {higher_lds[1]}')
ax1 = plt.subplot(2,1,1)
ax1.set_title('CS +')
sns.kdeplot(df_plus)
ax2 = plt.subplot(2,1,2)
ax2.set_title('CS -')
sns.kdeplot(df_minus)
fig.tight_layout()
plt.show()

In [None]:
plus, minus = extract_ratings(higher_lds[0])
df_plus = dataFrame_kde(array_csplus_simulated[:, 1], plus)
df_minus = dataFrame_kde(array_csminus_simulated[:, 1], minus)
fig = plt.figure(figsize=(7,7))
fig.suptitle(f'Subject {higher_lds[0]}')
ax1 = plt.subplot(2, 1, 1)
ax1.set_title('CS +')
sns.kdeplot(df_plus)
ax2 = plt.subplot(2, 1, 2)
ax2.set_title('CS -')
sns.kdeplot(df_minus)
fig.tight_layout()
plt.show()

In [None]:
plus, minus = extract_ratings(lower_lds[0])
df_plus = dataFrame_kde(array_csplus_simulated[:, 1], plus)
df_minus = dataFrame_kde(array_csminus_simulated[:, 1], minus)
fig = plt.figure(figsize=(7,7))
fig.suptitle(f'Subject {lower_lds[0]}')
ax1 = plt.subplot(2, 1, 1)
ax1.set_title('CS +')
sns.kdeplot(df_plus)
ax2 = plt.subplot(2, 1, 2)
ax2.set_title('CS -')
sns.kdeplot(df_minus)
fig.tight_layout()
plt.show()

In [None]:
plus, minus = extract_ratings(lower_lds[1])
df_plus = dataFrame_kde(array_csplus_simulated[:, 1], plus)
df_minus = dataFrame_kde(array_csminus_simulated[:, 1], minus)
fig = plt.figure(figsize=(7,7))
fig.suptitle(f'Subject {lower_lds[1]}')
ax1 = plt.subplot(2, 1, 1)
ax1.set_title('CS +')
sns.kdeplot(df_plus)
ax2 = plt.subplot(2, 1, 2)
ax2.set_title('CS -')
sns.kdeplot(df_minus)
fig.tight_layout()
plt.show()