# Analysis of convergence 

In [1]:
import pandas as pd

data = pd.read_csv('study1_with_simulation_result.csv')

# preview data 
data.head(3)

Unnamed: 0,case_id,age,sex,ethnicity,rel_acquaintance,rel_friend,rel_roommate,rel_boygirlfriend,rel_relative,rel_other,...,Unenvious,Unintellectual,Unsympathetic,Warm,Withdrawn,miniMarker_simulated_E,miniMarker_simulated_A,miniMarker_simulated_C,miniMarker_simulated_N,miniMarker_simulated_O
0,1,27.0,M,2.0,,,,,,,...,5,4,1,5,4,47,54,42,24,26
1,2,26.0,M,3.0,,,,,,,...,4,4,4,4,4,40,40,40,36,36
2,3,24.0,F,4.0,,,,,,,...,9,2,1,5,5,28,49,49,20,38


In [2]:
# get original BFI score 
original_bfi2_e = data['bfi2_e']
original_bfi2_a = data['bfi2_a']
original_bfi2_c = data['bfi2_c']
original_bfi2_n = data['bfi2_n']
original_bfi2_o = data['bfi2_o']

In [3]:
# get original mini_marker_score
original_tda_a = data['tda_a']
original_tda_e = data['tda_e']
original_tda_n = data['tda_n']
original_tda_c = data['tda_c']
original_tda_o = data['tda_o']

In [4]:
# get simulated mini_marker_score
sim_tda_a = data['miniMarker_simulated_A'] /8
sim_tda_e = data['miniMarker_simulated_E'] /8
sim_tda_n = data['miniMarker_simulated_N'] /8
sim_tda_c = data['miniMarker_simulated_C'] /8
sim_tda_o = data['miniMarker_simulated_O'] /8

In [5]:
# create a df for [sim_tda_c, original_tda_c]
df = pd.DataFrame({'sim_tda_c': sim_tda_c, 'original_tda_c': original_tda_c})
df

Unnamed: 0,sim_tda_c,original_tda_c
0,5.250,5.500
1,5.000,4.500
2,6.125,8.125
3,4.500,4.500
4,6.625,8.500
...,...,...
433,6.000,7.375
434,5.500,6.125
435,6.500,7.125
436,6.500,6.625


# Compute correlations

## Replicating Soto's study

In [6]:
from scipy.stats import pearsonr
r_bfi_original_tda_e, _ = pearsonr(original_bfi2_e, original_tda_e)
r_bfi_original_tda_a, _ = pearsonr(original_bfi2_a, original_tda_a)
r_bfi_original_tda_n, _ = pearsonr(original_bfi2_n, original_tda_n)
r_bfi_original_tda_c, _ = pearsonr(original_bfi2_c, original_tda_c)
r_bfi_original_tda_o, _ = pearsonr(original_bfi2_o, original_tda_o)

# compute the average correlation
r_bfi_original_tda = (r_bfi_original_tda_e + r_bfi_original_tda_a + r_bfi_original_tda_n + r_bfi_original_tda_c + r_bfi_original_tda_o) / 5

r_bfi_original_tda

0.801933672053827

In [7]:
print(r_bfi_original_tda_e)
print(r_bfi_original_tda_a)
print(r_bfi_original_tda_n)
print(r_bfi_original_tda_c)
print(r_bfi_original_tda_o)

0.8845839782579082
0.7962419547822508
0.7384592648174876
0.8399377903528379
0.7504453720586505


## Examining the Simulation result: original BFI vs simulated mini_marker

In [8]:
r_bfi_sim_tda_e, _ = pearsonr(original_bfi2_e, sim_tda_e)
r_bfi_sim_tda_a, _ = pearsonr(original_bfi2_a, sim_tda_a)
r_bfi_sim_tda_n, _ = pearsonr(original_bfi2_n, sim_tda_n)
r_bfi_sim_tda_c, _ = pearsonr(original_bfi2_c, sim_tda_c)
r_bfi_sim_tda_o, _ = pearsonr(original_bfi2_o, sim_tda_o)

# compute the average correlation
r_bfi_sim_tda = (r_bfi_sim_tda_e + r_bfi_sim_tda_a + r_bfi_sim_tda_n + r_bfi_sim_tda_c + r_bfi_sim_tda_o) / 5

r_bfi_sim_tda

0.7231241754002166

In [9]:
print(r_bfi_sim_tda_e)
print(r_bfi_sim_tda_a)
print(r_bfi_sim_tda_n)
print(r_bfi_sim_tda_c)
print(r_bfi_sim_tda_o)

0.8775976495788741
0.6428345934052743
0.7067069067949797
0.8428508448306108
0.545630882391344


## Examining the Simulation result: original BFI vs simulated mini_marker

In [10]:
r_sim_tda_e, _ = pearsonr(sim_tda_e, original_tda_e)
r_sim_tda_a, _ = pearsonr(sim_tda_a, original_tda_a)
r_sim_tda_n, _ = pearsonr(sim_tda_n, original_tda_n)
r_sim_tda_c, _ = pearsonr(sim_tda_c, original_tda_c)
r_sim_tda_o, _ = pearsonr(sim_tda_o, original_tda_o)

In [11]:
print(r_sim_tda_e)
print(r_sim_tda_a)
print(r_sim_tda_n)
print(r_sim_tda_c)
print(r_sim_tda_o)

0.8287121377016021
0.6008622874794822
0.6342899270425806
0.750761495867386
0.49365239178344894


In [12]:
# compute the average correlation
r_sim_tda = (r_sim_tda_e + r_sim_tda_a + r_sim_tda_n + r_sim_tda_c + r_sim_tda_o) / 5

r_sim_tda

0.6616556479749

In [13]:
pearsonr(sim_tda_c, original_tda_c)

PearsonRResult(statistic=0.750761495867386, pvalue=1.5533319925232719e-80)