In [3]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

In [19]:
means = {
    "Neuroticismus": 25.5,
    "Extraverze": 26.1,
    "Otevrenost_vuci_zkusenosti": 28.6,
    "Privetivost": 24.7,
    "Svedomitost": 26.8,
}

In [9]:
df = pd.read_csv("data.csv", index_col=0, delimiter=";")
df

Unnamed: 0_level_0,Neuroticismus,Extraverze,Otevrenost_vuci_zkusenosti,Privetivost,Svedomitost
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
36,10,33,30,42,42
40,21,31,32,35,36
48,16,32,28,25,38
82,26,30,40,39,28
205,30,29,33,32,32
318,6,37,34,34,26
344,24,25,34,29,35
347,16,37,37,32,36
409,33,27,31,31,22
515,15,37,37,31,39


In [10]:
df.describe()

Unnamed: 0,Neuroticismus,Extraverze,Otevrenost_vuci_zkusenosti,Privetivost,Svedomitost
count,15.0,15.0,15.0,15.0,15.0
mean,21.533333,31.466667,33.733333,32.0,30.6
std,8.838767,6.069439,3.972525,6.301927,7.669979
min,6.0,15.0,28.0,19.0,17.0
25%,15.5,29.5,30.5,30.0,24.5
50%,21.0,32.0,34.0,32.0,32.0
75%,28.5,36.5,37.0,34.5,36.0
max,34.0,39.0,40.0,42.0,42.0


In [20]:
means

{'Neuroticismus': 25.5,
 'Extraverze': 26.1,
 'Otevrenost_vuci_zkusenosti': 28.6,
 'Privetivost': 24.7,
 'Svedomitost': 26.8}

In [40]:
def get_stats(values, h0_mean):
    n = len(values)
    x_mean = np.mean(values)
    scale = stats.sem(values)
    return n, x_mean, scale

def left_sided_p_value(values, h0_mean):
    n, x_mean, scale = get_stats(values, h0_mean)
    p = stats.t.cdf(x=x_mean, loc=h0_mean, scale=scale, df=n-1)
    return p

def right_sided_p_value(values, h0_mean):
    p = left_sided_p_value(values, h0_mean)
    p_2 = 1 - p
    return p_2

def both_sided_p_value(values, h0_mean):
    p = left_sided_p_value(values, h0_mean)
    p_2 = 1 - p
    p = 2 * min(p, p_2)
    return p

def get_effect_size(values, h0_mean):
    n, x_mean, scale = get_stats(values, h0_mean)
    e = (x_mean - h0_mean) / np.std(values, ddof=1)
    return e
    
for col in df.columns:
    print(f"==== {col} ====")
    
    n, x_mean, scale = get_stats(df[col], means[col])
    
    p_left = left_sided_p_value(df[col], means[col])
    p_left_1 = stats.ttest_1samp(df[col], means[col], alternative='less') 
    p_right = right_sided_p_value(df[col], means[col])
    p_both = both_sided_p_value(df[col], means[col])
    effect_size = get_effect_size(df[col], means[col])
    
    print(f"{x_mean=}, h0_mean={means[col]}")
    print(f"{p_left=}, {p_left_1=} ")
    print(f"{p_right=}")
    print(f"{p_both=}")
    print(f"{effect_size=}")
    print()


==== Neuroticismus ====
x_mean=21.533333333333335, h0_mean=25.5
p_left=0.05206216081609009, p_left_1=Ttest_1sampResult(statistic=-1.7381194920153527, pvalue=0.05206216081609009) 
p_right=0.94793783918391
p_both=0.10412432163218018
effect_size=-0.44878052308626376

==== Extraverze ====
x_mean=31.466666666666665, h0_mean=26.1
p_left=0.9979462662125995, p_left_1=Ttest_1sampResult(statistic=3.424535460914517, pvalue=0.9979462662125995) 
p_right=0.00205373378740048
p_both=0.00410746757480096
effect_size=0.884211253907911

==== Otevrenost_vuci_zkusenosti ====
x_mean=33.733333333333334, h0_mean=28.6
p_left=0.9999035871231939, p_left_1=Ttest_1sampResult(statistic=5.004705088567807, pvalue=0.9999035871231939) 
p_right=9.641287680606236e-05
p_both=0.00019282575361212473
effect_size=1.2922092973801755

==== Privetivost ====
x_mean=32.0, h0_mean=24.7
p_left=0.999743804603156, p_left_1=Ttest_1sampResult(statistic=4.4863702461579695, pvalue=0.999743804603156) 
p_right=0.0002561953968439612
p_both=0.