In [1]:
# Setting up path 
from pathlib import Path
import os


# --- Helper: choose between macOS-style and Windows-style roots ---
def project_path(mac_path_str, win_path_str, linux_path_str):
    mac_p = Path(mac_path_str)
    win_p = Path(win_path_str)
    linux_p = Path(linux_path_str)
    if mac_p.exists():
        return mac_p
    elif win_p.exists():
        return win_p
    elif linux_p.exists():
        return linux_path_str
    else:
        # Neither exists; return mac version but warn
        print(f"WARNING: Neither {mac_p} nor {win_p} exists on this system.")
        return mac_p


project_path_set = project_path(
    "/Users/kaankeskin/projects/sch_pe/",
    "C:/Users/kaank/OneDrive/Belgeler/GitHub/sch_pe/",
    "/home/kaanka5312/projects/sch_pe"
)

# Set the working directory
os.chdir(project_path_set)

# Check current working directory
print("Current working directory:", os.getcwd())

Current working directory: C:\Users\kaank\OneDrive\Belgeler\GitHub\sch_pe


In [50]:
import pandas as pd 

subj_list = pd.read_csv("./data/raw/subjects_list.csv")
eo_resp = pd.read_csv("./data/raw/response.csv")
a_resp = pd.read_csv("./data/raw/aslihanyanit.csv")

# To distinguish same subject number between aslihan and ozge 
# and matching to subject_list keys
a_resp["denekId"] = ("100" + a_resp["denekId"].astype(str)).astype(int)



In [58]:
eo_sz = eo_resp[
    eo_resp['denekId'].isin(
        subj_list.loc[ subj_list[' group']==1, ' task-id' ]
    )
]

sz_filtered = eo_sz[
    (eo_sz["secimsure"] > 0) &
    (eo_sz["secimsure"] < 100000)
]


In [None]:
eo_hc = eo_resp[
    eo_resp['denekId'].isin(
        subj_list.loc[ subj_list[' group']==0, ' task-id' ]
    )
]

a_hc = a_resp[
    a_resp['denekId'].isin(
        subj_list.loc[ subj_list[' group']==0, ' task-id' ]
    )
]

hc_all = pd.concat([eo_hc, a_hc])

# 1) drop all rows where sayac == 0
hc_filtered = hc_all[hc_all['sayac'] != 0]

hc_filtered = hc_filtered[
    (hc_filtered["secimsure"] > 0) &
    (hc_filtered["secimsure"] < 100000)
]


In [66]:
# clean up column names
subj_list.columns = subj_list.columns.str.strip()
a_resp.columns    = a_resp.columns.str.strip()

# build the mask of group-0 task-IDs
group0_ids = subj_list.loc[subj_list['group']==0, 'task-id']

# select rows where denekId is NOT in that list
a_dep = a_resp[~a_resp['denekId'].isin(group0_ids)]

# 1) find all denekId’s with sayac == 59
ids_with_59 = a_dep.loc[a_dep['sayac'] == 59, 'denekId'].unique()

# 2) filter a_dep to only those IDs
a_dep_filtered = a_dep[ a_dep['denekId'].isin(ids_with_59) ]

a_dep_filtered = a_dep_filtered[
    (a_dep_filtered["secimsure"] > 0) &
    (a_dep_filtered["secimsure"] < 100000)
]

In [None]:
# 2) group by denekId and take the mean of secimsure
hc_avg = (
    hc_filtered
      .groupby('denekId', as_index=False)['secimsure']
      .mean()
      .rename(columns={'secimsure':'avg_secimsure'})
)

# 2) group by denekId and take the mean of secimsure
sz_avg = (
    sz_filtered
      .groupby('denekId', as_index=False)['secimsure']
      .mean()
      .rename(columns={'secimsure':'avg_secimsure'})
)

dep_avg = (
    a_dep_filtered
      .groupby('denekId', as_index=False)['secimsure']
      .mean()
      .rename(columns={'secimsure':'avg_secimsure'})
)


    denekId  avg_secimsure
0      1003     713.280702
1      1004     778.423729
2      1005     843.711864
3      1007    1273.637931
4      1008    1130.000000
5      1009     917.250000
6     10010     978.067797
7     10015    1238.762712
8     10016    1356.220000
9     10017     708.406780
10    10019     976.186441
11    10020     966.879310
12    10021     647.866667
13    10022     592.050000
14    10023    1134.254902
15    10024     944.440678
16    10025     901.166667
17    10026    1771.355932
18    10027    1112.283333
19    10030     851.615385
20    10031     778.500000
21    10036     918.016949
22    10038     622.928571
23    10039     962.254237
24    10040     648.338983
25    10041     629.813559
26    10044     714.933333
27    10045     713.034483
28    10050     789.350000
29    10051    1016.566667
30    10052     801.745763
31    10053     919.084746
32    10054    1345.881356
33    10055     893.482759
34    10056    1111.803571
35    10059    1011.616667
3

In [68]:
from scipy.stats import ttest_ind

# extract the two series (and drop any NaNs)
x = hc_avg['avg_secimsure']
y = dep_avg['avg_secimsure']

# perform Welch’s t-test (does NOT assume equal variances)
t_stat, p_value = ttest_ind(x, y, equal_var=False)

print(f"t = {t_stat:.3f}, p = {p_value:.3f}")


t = -0.654, p = 0.515
