In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2
from scipy.stats import beta
import pickle
from scipy.stats import pearsonr
import math
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.neural_network import MLPClassifier,MLPRegressor
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,mean_squared_error
from itertools import combinations
from tabulate import tabulate

In [11]:
def cronbach_alpha1(items):
    item_variances = np.var(items, axis=0, ddof=1)
    total_variances = np.var(np.sum(items, axis=1), ddof=1)
    num_items = len(items[0])
    
    return (num_items / (num_items - 1)) * (1 - (np.sum(item_variances) / total_variances))
def cronbach_alpha2(items):
    num_items = items.shape[1]
    cov_sum = 0  
    for i in range(num_items):
        for j in range(i+1, num_items):  # Avoid duplicate pairs and the diagonal
            cov_ij = np.cov(items[:, i], items[:, j], ddof=1)[0, 1]  # Covariance between item i and item j
            cov_sum += cov_ij    
    mean_covariance = cov_sum / (num_items * (num_items - 1) / 2)
    item_variances = np.var(items, axis=0, ddof=1)  # Compute variance along columns (axis=0)
    mean_variance = np.mean(item_variances)
    N=items.shape[1]
    alpha=(N*mean_covariance)/(mean_variance+(N-1)*mean_covariance)
    return alpha

In [4]:
df=pd.read_csv('CSV/Labelled_data.csv')
questions={
    'EI':[0,2,4,6,8,10,12,14,16],
    'WI':[27,30,33,36,38,41,44,47,50],
    'EC':[39,32,35,40,43,46,49,52,53],
    'WC':[1,5,9,13,17,19,21,23,25],
    'EA':[3,7,11,16,18,20,22,24,26],
    'WA':[28,31,34,37,39,42,45,48,51],
    'AM':[56,60,72],
    'LC':[54,59,64,67],
    'MC':[55,62,65,69],
    'ND':[63,66],
    'PfW':[58,68,71],
    'SE':[57,61,70,73],
    'E':[74,79,84,89,94,99,104,109,114,119],
    'A':[75,80,85,90,95,100,105,110,115,120],
    'C':[76,81,86,91,96,101,106,111,116,121],
    'N':[77,82,87,92,97,102,107,112,117,122],
    'O':[78,83,88,93,98,103,108,113,118,123]
}
def I_label(score):
    if(score==6 or score==7):
        return 0
    if(score>=2 and score <=5):
        return 1
    else:
        return 2
def C_label(score):
    if(score>=4 and score<=7):
        return 0
    elif(score==2 or score==3):
        return 1
    else:
        return 2
def A_label(score):
    if(score>=4 and score<=6):
        return 0
    if(score>=7 and score <=9):
        return 1
    else:
        return 2
def AM_LC_label(score):
    if(score>=4 and score<=5):
        return 0
    elif(score>=2.6 and score<=3.9):
        return 1
    else:
        return 2
def rest_label(score):
    if(score>=4 and score<=6):
        return 0
    elif(score>=2.6 and score<=3.9):
        return 1
    else:
        return 2
def EN_label(score):
    if(score>=0 and score<=25):
        return 0
    elif(score>=26 and score<=40):
        return 1
    else:
        return 2
def AO_label(score):
    if(score>=0 and score<=30):
        return 0
    elif(score>=31 and score<=40):
        return 1
    else:
        return 2
def C3_label(score):
    if(score>=36 and score<=50):
        return 0
    elif(score>=26 and score<=35):
        return 1
    else:
        return 2
def I_smoothing(x):
    return(((8-x)*math.exp(0.6*x)+221.406)/29.591)
def C_smoothing(x):
    return(((12-x)*math.exp(0.15*x)-11.572)/0.327)
def A_smoothing(x):
    return((2+((x-2)*math.exp(-0.32*x)))/0.261)
def identity(x):
    return x
functions={
    'EI':I_label,
    'WI':I_label,
    'EC':C_label,
    'WC':C_label,
    'EA':A_label,
    'WA':A_label,
    'AM':AM_LC_label,
    'LC':AM_LC_label,
    'MC':rest_label,
    'ND':rest_label,
    'PfW':rest_label,
    'SE':rest_label,
    'E':EN_label,
    'A':AO_label,
    'C':C3_label,
    'N':EN_label,
    'O':AO_label
}
smoothing={
    'EI':I_smoothing,
    'WI':I_smoothing,
    'EC':C_smoothing,
    'WC':C_smoothing,
    'EA':A_smoothing,
    'WA':A_smoothing,
    'AM':identity,
    'LC':identity,
    'MC':identity,
    'ND':identity,
    'PfW':identity,
    'SE':identity,
    'E':identity,
    'A':identity,
    'C':identity,
    'N':identity,
    'O':identity
}

In [12]:
traits=['EI','WI','EC','WC','EA','WA','AM','LC','MC','ND','PfW','SE','E','A','C','N','O']
for trait in traits:
    indices=questions[trait]
    columns=[f'feature{i}' for i in indices]
    items=df[columns].values
    cba1=cronbach_alpha1(items)
    cba2=cronbach_alpha2(items)
    print(f'{trait}: alpha1={cba1}, alpha2={cba2}')

EI: alpha1=0.7660392196218342, alpha2=0.7660392196218343
WI: alpha1=0.8391245247600587, alpha2=0.8391245247600587
EC: alpha1=0.7568966939559056, alpha2=0.7568966939559055
WC: alpha1=0.8549385854606065, alpha2=0.8549385854606065
EA: alpha1=0.7798712692306041, alpha2=0.7798712692306041
WA: alpha1=0.7076473273547637, alpha2=0.7076473273547634
AM: alpha1=0.3149583758747311, alpha2=0.3149583758747309
LC: alpha1=0.603000654580849, alpha2=0.6030006545808486
MC: alpha1=0.7258106967185578, alpha2=0.7258106967185582
ND: alpha1=0.4990860498584744, alpha2=0.49908604985847516
PfW: alpha1=0.6387601787309714, alpha2=0.6387601787309716
SE: alpha1=0.722150865708604, alpha2=0.722150865708604
E: alpha1=0.7036938063712402, alpha2=0.7036938063712401
A: alpha1=0.6888833804800116, alpha2=0.6888833804800117
C: alpha1=0.6740799661251338, alpha2=0.6740799661251339
N: alpha1=0.8142940895270674, alpha2=0.8142940895270672
O: alpha1=0.7036003256893304, alpha2=0.7036003256893303


In [14]:
items=df[traits].values
cba1=cronbach_alpha1(items)
cba2=cronbach_alpha2(items)
print(f'alpha1={cba1}, alpha2={cba2}')

alpha1=0.7534507996324709, alpha2=0.7534507996324707


In [16]:
index=['CXI','BXI','TXI']
items=df[index].values
cba1=cronbach_alpha1(items)
cba2=cronbach_alpha2(items)
print(f'alpha1={cba1}, alpha2={cba2}')

alpha1=0.9568966319545564, alpha2=0.9568966319545565


In [19]:
tests=['Test-1','Test-2','Test-3']
items=df[tests].values
cba1=cronbach_alpha1(items)
cba2=cronbach_alpha2(items)
print(f'alpha1={cba1}, alpha2={cba2}')

alpha1=0.09105068625024992, alpha2=0.09105068625024981


In [22]:
X=[]
for trait in traits:
    x=[smoothing[trait](i) for i in list(df[trait])]
    X.append(x)
items=np.transpose(np.array(X))
cba1=cronbach_alpha1(items)
cba2=cronbach_alpha2(items)
print(f'alpha1={cba1}, alpha2={cba2}')

alpha1=0.7275577971288005, alpha2=0.7275577971288006


In [24]:
features=[f'feature{i}' for i in range(124)]
items=df[features].values
cba1=cronbach_alpha1(items)
cba2=cronbach_alpha2(items)
print(f'alpha1={cba1}, alpha2={cba2}')

alpha1=0.9587917108830185, alpha2=0.9587917108830185


In [26]:
items=df.values
cba1=cronbach_alpha1(items)
cba2=cronbach_alpha2(items)
print(f'alpha1={cba1}, alpha2={cba2}')

alpha1=0.8763243254828517, alpha2=0.8763243254828517


In [28]:
similar_sets={
    'EI':[[2,4],[0,10],[6,15]],
    'WI':[[30,33,38,44,47,50]],
    'EC':[[29,46],[32,49,53],[35,52]],
    'WC':[[1,17],[5,21,25],[9,23]],
    'EA':[[3,7,20,26],[11,22],[18,24]],
    'WA':[[34,39,45,51],[31,42,28]],
    'E':[[79,99]],
    'A':[[80,105]],
    'N':[[92,122]],
    'O':[[88,103],[98,123]]
}
for trait in traits:
    if trait in similar_sets:
        for ques in similar_sets[trait]:
            col=[f'feature{i}' for i in ques]
            items=df[col].values
            cba=cronbach_alpha1(items)
            print(f'{trait} - {ques} : {cba}')

EI - [2, 4] : 0.5496148693331777
EI - [0, 10] : 0.4790914049147734
EI - [6, 15] : 0.48350114301085756
WI - [30, 33, 38, 44, 47, 50] : 0.7943606674031765
EC - [29, 46] : 0.5822957691603403
EC - [32, 49, 53] : 0.6893786510892645
EC - [35, 52] : 0.5165897424069179
WC - [1, 17] : 0.5379308577273818
WC - [5, 21, 25] : 0.7374804515673352
WC - [9, 23] : 0.6910403630804625
EA - [3, 7, 20, 26] : 0.7410540315887434
EA - [11, 22] : 0.5748919939809984
EA - [18, 24] : 0.5488589950290044
WA - [34, 39, 45, 51] : 0.6277589874918169
WA - [31, 42, 28] : 0.6109010075951323
E - [79, 99] : 0.6174779483709574
A - [80, 105] : 0.03419978776100496
N - [92, 122] : 0.2853113982168449
O - [88, 103] : 0.006443918545398164
O - [98, 123] : 0.6350686873747209
