In [None]:
import numpy as np
import pandas as pd
from scipy import stats

# sex
MALE = 0
FEMALE = 1

In [None]:
# CSV file path
file_path = "/content/drive/MyDrive/data/psycstat_data_2024.csv"

# CSV file
df = pd.read_csv(file_path)
df

Unnamed: 0,serial,sex,Per1,Per2,Per3,Per4,Per5,Per6,Per7,Per8,...,imp12,imp13,imp14,imp15,Alc1,Alc2,Alc3,Alc4,Alc5,Alc6
0,1,0,4,4,4,4,3,4,3,4,...,4,2,4,4,1,0,1,18,8,2
1,2,0,5,4,4,5,2,3,5,5,...,4,3,2,2,1,2,8,19,24,1
2,3,1,3,4,3,3,1,5,4,3,...,3,2,3,2,1,2,17,15,32,1
3,4,1,3,4,3,2,2,4,4,4,...,4,2,4,1,1,1,4,19,8,2
4,5,0,4,3,4,5,1,5,5,4,...,4,3,4,5,1,2,2,19,16,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,1,3,4,3,4,2,5,5,3,...,3,1,3,5,1,1,4,17,24,2
196,197,0,3,3,2,4,2,4,4,4,...,3,5,2,2,1,2,2,19,7,6
197,198,1,3,5,4,4,1,3,5,5,...,4,5,5,3,1,1,2,16,7,5
198,199,0,4,3,3,2,2,2,4,3,...,2,4,3,2,1,2,4,18,16,2


In [None]:
# inverse
df["Per5_inverse"] = 6 - df["Per5"]
df["Per9_inverse"] = 6 - df["Per9"]
df["Per10_inverse"] = 6 - df["Per10"]
df["Per15_inverse"] = 6 - df["Per15"]

In [None]:
# Extraversion
ext_cols = ["Per1", "Per9_inverse", "Per14"]
# Agreeableness
agr_cols = ["Per4", "Per12", "Per15_inverse"]
# Conscientiousness
con_cols = ["Per2", "Per6", "Per13"]
# Neuroticism
neu_cols = ["Per5_inverse", "Per7", "Per10_inverse"]

In [None]:
# mean data
df['Neu_mean'] = df[neu_cols].apply(lambda row: row.mean(skipna=True), axis=1)

In [None]:
# categorize by sex
male_neu = df[df['sex'] == MALE]['Neu_mean']
female_neu = df[df['sex'] == FEMALE]['Neu_mean']

In [None]:
# mean, var by sex
avg_neu_sex = df.groupby('sex')['Neu_mean'].mean()
var_neu_sex = df.groupby('sex')['Neu_mean'].var()

print('average by sex:\n', avg_neu_sex)
print('\nvariation by sex:\n', var_neu_sex)

average by sex:
 sex
0    3.500000
1    3.572464
Name: Neu_mean, dtype: float64

variation by sex:
 sex
0    0.646937
1    0.574667
Name: Neu_mean, dtype: float64


In [None]:
# Levene's Test
levene_stat, levene_p = stats.levene(male_neu, female_neu)
print('Levene\'s Test')
print('levene value:', levene_stat)
print('p-value:', levene_p)

Levene's Test
levene value: 0.059658745868675894
p-value: 0.8072890368687385


In [None]:
# F-test
F_stat, f_p = stats.f_oneway(male_neu, female_neu)
print('F-Test')
print('F-value:', F_stat)
print('p-value:', f_p)

F-Test
F-value: 0.42506162041822326
p-value: 0.5151782351575653


In [None]:
# t-test
t_stat, t_p = stats.ttest_ind(male_neu, female_neu, equal_var=True)
print('t-Test')
print('t-value:', t_stat)
print('p-value:', t_p)

t-Test
t-value: -0.6519674995106866
p-value: 0.5151782351575742


In [None]:
# confidence interval
confidence_level = 0.95
degrees_of_freedom = len(male_neu) + len(female_neu) - 2
t_critical = stats.t.ppf(1 - (1 - confidence_level) / 2, degrees_of_freedom)
mean_difference = np.mean(male_neu) - np.mean(female_neu)
std_error = np.sqrt((male_neu.var() / len(male_neu) + female_neu.var() / len(female_neu)))

margin_of_error = t_critical * std_error
lower_bound = mean_difference - margin_of_error
upper_bound = mean_difference + margin_of_error

print("95% Confidence Interval")
print("Lower Bound:", lower_bound)
print("Upper Bound:", upper_bound)


95% Confidence Interval
Lower Bound: -0.2906061037257788
Upper Bound: 0.14567856749389585


In [None]:
from statsmodels.stats.weightstats import DescrStatsW

In [None]:
descr_male = DescrStatsW(male_neu)
descr_female = DescrStatsW(female_neu)

cm = descr_male.get_compare(descr_female)
conf_int = cm.tconfint_diff(alpha=0.05, usevar='pooled')

print("Lower Bound:", conf_int[0])
print("Upper Bound:", conf_int[1])

Lower Bound: -0.2916461926103965
Upper Bound: 0.14671865637851178
