# Analysis of convergence 

In [1]:
import pandas as pd

data = pd.read_csv('study1_with_simulation_result.csv')

# preview data 
data.head(3)

Unnamed: 0,case_id,age,sex,ethnicity,rel_acquaintance,rel_friend,rel_roommate,rel_boygirlfriend,rel_relative,rel_other,...,Unenvious,Unintellectual,Unsympathetic,Warm,Withdrawn,miniMarker_simulated_E,miniMarker_simulated_A,miniMarker_simulated_C,miniMarker_simulated_N,miniMarker_simulated_O
0,1,27.0,M,2.0,,,,,,,...,9,2,2,9,3,61,66,48,28,47
1,2,26.0,M,3.0,,,,,,,...,7,4,3,7,4,51,56,44,29,49
2,3,24.0,F,4.0,,,,,,,...,8,2,2,8,9,14,63,47,33,61


In [2]:
# get original BFI score 
original_bfi2_e = data['bfi2_e']
original_bfi2_a = data['bfi2_a']
original_bfi2_c = data['bfi2_c']
original_bfi2_n = data['bfi2_n']
original_bfi2_o = data['bfi2_o']

In [3]:
# get original mini_marker_score
original_tda_a = data['tda_a']
original_tda_e = data['tda_e']
original_tda_n = data['tda_n']
original_tda_c = data['tda_c']
original_tda_o = data['tda_o']

In [4]:
# get simulated mini_marker_score
sim_tda_a = data['miniMarker_simulated_A'] /8
sim_tda_e = data['miniMarker_simulated_E'] /8
sim_tda_n = data['miniMarker_simulated_N'] /8
sim_tda_c = data['miniMarker_simulated_C'] /8
sim_tda_o = data['miniMarker_simulated_O'] /8

# Compute correlations

## Replicating Soto's study

In [5]:
from scipy.stats import pearsonr
r_bfi_original_tda_e, _ = pearsonr(original_bfi2_e, original_tda_e)
r_bfi_original_tda_a, _ = pearsonr(original_bfi2_a, original_tda_a)
r_bfi_original_tda_n, _ = pearsonr(original_bfi2_n, original_tda_n)
r_bfi_original_tda_c, _ = pearsonr(original_bfi2_c, original_tda_c)
r_bfi_original_tda_o, _ = pearsonr(original_bfi2_o, original_tda_o)

# compute the average correlation
r_bfi_original_tda = (r_bfi_original_tda_e + r_bfi_original_tda_a + r_bfi_original_tda_n + r_bfi_original_tda_c + r_bfi_original_tda_o) / 5

r_bfi_original_tda

0.801933672053827

In [6]:
print(r_bfi_original_tda_e)
print(r_bfi_original_tda_a)
print(r_bfi_original_tda_n)
print(r_bfi_original_tda_c)
print(r_bfi_original_tda_o)

0.8845839782579082
0.7962419547822508
0.7384592648174876
0.8399377903528379
0.7504453720586505


## Examining the Simulation result: original BFI vs simulated mini_marker

In [7]:
r_bfi_sim_tda_e, _ = pearsonr(original_bfi2_e, sim_tda_e)
r_bfi_sim_tda_a, _ = pearsonr(original_bfi2_a, sim_tda_a)
r_bfi_sim_tda_n, _ = pearsonr(original_bfi2_n, sim_tda_n)
r_bfi_sim_tda_c, _ = pearsonr(original_bfi2_c, sim_tda_c)
r_bfi_sim_tda_o, _ = pearsonr(original_bfi2_o, sim_tda_o)

# compute the average correlation
r_bfi_sim_tda = (r_bfi_sim_tda_e + r_bfi_sim_tda_a + r_bfi_sim_tda_n + r_bfi_sim_tda_c + r_bfi_sim_tda_o) / 5

r_bfi_sim_tda

0.7895051847417606

In [8]:
print(r_bfi_sim_tda_e)
print(r_bfi_sim_tda_a)
print(r_bfi_sim_tda_n)
print(r_bfi_sim_tda_c)
print(r_bfi_sim_tda_o)

0.8334198294450234
0.648158418113815
0.8032903599629935
0.839532633898929
0.8231246822880419


## Examining the Simulation result: original BFI vs simulated mini_marker

In [9]:
r_sim_tda_e, _ = pearsonr(sim_tda_e, original_tda_e)
r_sim_tda_a, _ = pearsonr(sim_tda_a, original_tda_a)
r_sim_tda_n, _ = pearsonr(sim_tda_n, original_tda_n)
r_sim_tda_c, _ = pearsonr(sim_tda_c, original_tda_c)
r_sim_tda_o, _ = pearsonr(sim_tda_o, original_tda_o)

In [10]:
print(r_sim_tda_e)
print(r_sim_tda_a)
print(r_sim_tda_n)
print(r_sim_tda_c)
print(r_sim_tda_o)

0.7780370453234239
0.635373366790835
0.6168498575984267
0.7473372679463908
0.6700430484351185


In [11]:
# compute the average correlation
r_sim_tda = (r_sim_tda_e + r_sim_tda_a + r_sim_tda_n + r_sim_tda_c + r_sim_tda_o) / 5

r_sim_tda

0.689528117218839