In [None]:
import pandas as pd

# Step 0: Load the data
cleaned_data = pd.read_csv(r'C:\Users\MadiL\Thesis CodeBase\Longitudinal\Study2_data.csv')

# Step 1: Convert StartDate and EndDate to datetime format
cleaned_data['StartDate'] = pd.to_datetime(cleaned_data['StartDate'], errors='coerce')
cleaned_data['EndDate'] = pd.to_datetime(cleaned_data['EndDate'], errors='coerce')

# Step 2: Calculate composite survey scores for LSAS, BDI, and CFS
# LSAS score columns (assuming they follow the pattern LSAS_anx1, LSAS_anx2, ..., LSAS_anx19)
lsas_columns = [col for col in cleaned_data.columns if 'LSAS' in col]
bd_columns = [col for col in cleaned_data.columns if 'BDI' in col]  # Assuming BDI is similarly named
cfs_columns = [col for col in cleaned_data.columns if 'CFS' in col]

# Create composite scores for each participant at each time point
cleaned_data['LSAS_score'] = cleaned_data[lsas_columns].sum(axis=1, skipna=True)
cleaned_data['BDI_score'] = cleaned_data[bd_columns].sum(axis=1, skipna=True)
cleaned_data['CFS_score'] = cleaned_data[cfs_columns].sum(axis=1, skipna=True)

# Step 3: Sort data by Participant_Number and StartDate
cleaned_data_sorted = cleaned_data.sort_values(by=['Participant_Number', 'StartDate'])

# Step 4: Reshape the data to long format for longitudinal analysis
long_data = pd.melt(cleaned_data_sorted, 
                    id_vars=['Participant_Number', 'StartDate', 'EndDate'], 
                    value_vars=['LSAS_score', 'BDI_score', 'CFS_score'],
                    var_name='Survey', value_name='Score')

long_data.to_csv(r'Study2_data_unstandardized.csv', index=False)

# Step 5: Standardize scores to z-scores within each survey type
from scipy.stats import zscore

# Reverse the effect of CFS scores by multiplying by -1
long_data.loc[long_data['Survey'] == 'CFS_score', 'Score'] *= -1

# Standardize scores to z-scores within each survey type
long_data['Standardized_Score'] = long_data.groupby('Survey')['Score'].transform(zscore)

long_data.to_csv(r'Study2_data_standardized.csv', index=False)
