In [1]:
import pandas as pd
import ast


In [2]:
from server_config import DATA_PATH

### 1. Therapist data

In [3]:
df_th = pd.read_csv(DATA_PATH + "/df_th_nopr_experience_corrected_200624_LB.csv", sep=";",low_memory = "False")

In [4]:
keep_cols_th = ['therapist_id', 't0_method_t_CBT','t0_method_t_PDT','t0_method_t_ST','t0_method_t_AP', 't0_appliedmethod_t_VT', 't0_appliedmethod_t_TP',
             't0_appliedmethod_t_AP', 't0_appliedmethod_t_ST','t0_appliedmethod_t_Other','t0_location_t','t0_gender_t',
 't0_relationship_t','t0_sexualorientation_t','t0_children_t','t0_age_t', 't0_experience_t','t0_license_t', 't0_bfi_t_1',
 't0_bfi_t_2','t0_bfi_t_3','t0_bfi_t_4','t0_bfi_t_5','t0_bfi_t_6','t0_bfi_t_7','t0_bfi_t_8','t0_bfi_t_9','t0_bfi_t_10',]

In [5]:
labels = ["VT", "TP", "AP", "ST", "Other"]  

col = df_th["t0_appliedmethod_t_A"]

# 1) Parse strings like "[True, False, ...]" -> list
col = col.map(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# 2) Unwrap nested lists like [[True, False, ...]] -> [True, False, ...]
col = col.map(lambda x: x[0] if isinstance(x, (list, tuple)) and len(x) == 1 and isinstance(x[0], (list, tuple)) else x)

# 3) Safety: replace missing/invalid with all-False (or raise if you prefer)
def ensure_five(x):
    if isinstance(x, (list, tuple)) and len(x) == 5:
        return list(x)
    if pd.isna(x):
        return [False]*5

col = col.map(ensure_five)

# 4) Build the indicator columns
method_df = (
    pd.DataFrame(col.tolist(), index=df_th.index, columns=labels)
      .astype(int)
      .add_prefix("t0_appliedmethod_t_")
)

In [6]:
df_th = df_th.join(method_df)


In [7]:
df_th = df_th[keep_cols_th]

### 2. Patient data

In [8]:
df_pat = pd.read_json(DATA_PATH + "/TONI_pat_corrected_20241025_raw.json")

In [9]:
df_pat = df_pat.drop(columns=[c for c in df_pat.columns if c.startswith(('t2','t3','t4','u_','w', 'dropout'))])

In [None]:
keep_col_pat = (
    # IDs / study
    ['patient_id', 'therapist_id', 'studyGroup'] +

    # Demographics / baseline
    ['t0_age_p','t0_gender_p','t0_school_p','t0_employment_p','t0_location_p','t0_chronicity_p'] +
    [f"t0_therapy_p_{i}" for i in range(1, 4)] +
    ['t0_familyhistory_p','t0_meds_p','t0_relationship_p','t0_ses_p','t0_distance_p'] +
    [f"t0_migration_p_{i}" for i in range(1, 4)] +
    [f"surveys.t0.answers.1585.discrimination_{i}" for i in range(1, 6)] +
    [f"t0_disease_p_{c}" for c in list('ABCDEFG')] +

    # t0 scales
    [f"t0_phq_p_{i}" for i in range(1, 9)] +
    [f"t0_gad_p_{i}" for i in range(1, 7+1)] +
    [f"t0_swls_p_{i}" for i in range(1, 5+1)] +
    [f"t0_aqol_p_{i}" for i in range(1, 35+1)] +
    [f"t0_ede_p_{i}" for i in range(1, 8+1)] +
    ['t0_audit_p_filter'] + [f"t0_audit_p_{i}" for i in range(1, 3+1)] +
    ['t0_dudit_p_filter'] + [f"t0_dudit_p_{i}" for i in range(1, 4+1)] +
    [f"t0_msfq_p_{i}" for i in range(1, 5+1)] +
    [f"t0_pid_p_{i}" for i in range(1, 36+1)] +
    [f"t0_opd_p_{i}" for i in range(1, 12+1)] +
    [f"t0_ace_p_{i}" for i in range(1, 10+1)] +
    [f"t0_ask_p_{i}" for i in range(1, 3+1)] +
    [f"t0_mhse_p_{i}" for i in range(1, 6+1)] +
   

    # t1 scales
    [f"t1_phq_p_{i}" for i in range(1, 8+1)] +
    [f"t1_gad_p_{i}" for i in range(1, 7+1)]

    ['t0_change_tp_1', 't0_change_tp_2', 't0_matching_tp_1'] +
    [f"t1_wai_tp_{i}" for i in range(1, 9)] +
    [f"t1_wai_p_{i}" for i in range(1, 11)] +
    ['t1_matching_tp_1','t0_matching_p_1', 't1_matching_p_1']
)