In [2]:
import pandas as pd
import numpy as np

# Import Dataset

In [3]:
# Import RedcapExport.csv as redcap
redcap = pd.read_csv('Redcap Export.csv')
# Display the first 5 rows of the dataframe
redcap.head()

Unnamed: 0,record_id,redcap_event_name,redcap_repeat_instrument,redcap_repeat_instance,subjectid,subject_referral,covid_group,interventiongroup,subject_information_complete,mmrc_score,...,pdf_outcomemeasures,pdf_complete,diary_sessions_1,diary_sessions_2,diary_sessions_3,diary_sessions_4,diary_mip_1,diary_mip_2,diary_mip_3,training_diary_complete
0,1,screening_arm_1,,,1.0,Physical Therapy,1.0,,2.0,2.0,...,,,,,,,,,,
1,1,visit_2_arm_1,,,,,,,,0.0,...,pcs01v2 measures.pdf,2.0,14.0,14.0,14.0,14.0,65.0,83.0,80.0,2.0
2,1,baseline_arm_1,,1.0,,,,,,2.0,...,pcs01v1 measures.pdf,2.0,,,,,,,,
3,2,screening_arm_1,,,2.0,Physical Therapy,1.0,,0.0,2.0,...,,,,,,,,,,
4,2,visit_2_arm_1,,,,,,,,2.0,...,pcs02v2 measures.pdf,2.0,,,,,,,,0.0


In [4]:
# Step 1. Extract covid_group and interventiongroup from 'screening_arm_1' rows
# and map them to all rows with the same record_id

# Create a small DataFrame containing just screening_arm_1 rows
screening_data = redcap[redcap['redcap_event_name'] == 'screening_arm_1']

# Build a mapping dictionary for covid_group and interventiongroup
covid_group_map = screening_data.set_index('record_id')['covid_group'].to_dict()
interventiongroup_map = screening_data.set_index('record_id')['interventiongroup'].to_dict()

# Fill missing values in covid_group and interventiongroup columns across all rows
redcap['covid_group'] = redcap.apply(
    lambda row: covid_group_map.get(row['record_id'], row['covid_group']),
    axis=1
)
redcap['interventiongroup'] = redcap.apply(
    lambda row: interventiongroup_map.get(row['record_id'], row['interventiongroup']),
    axis=1
)

# Step 2. Remove 'screening_arm_1' rows
redcap = redcap[redcap['redcap_event_name'].isin(['baseline_arm_1', 'visit_2_arm_1'])]

# Step 3. Create the time_point column
time_point_map = {
    'baseline_arm_1': 1,
    'visit_2_arm_1': 2
}
redcap['time_point'] = redcap['redcap_event_name'].map(time_point_map)

# NOTE: Remember to update RedCAP (FIX THIS)
# Step 4. Manually overwrite interventiongroup using your dictionary
manual_interventiongroup = {
    1:1, 2:0, 3:1, 4:0, 5:0, 6:0, 7:1, 8:1, 9:0, 10:1,
    11:1, 13:1, 14:0, 15:1, 16:0, 17:0, 18:1, 20:0, 21:1
}

# Temporarily set Covid_group = 1 for record_id 7
redcap.loc[redcap['record_id'] == 7, 'covid_group'] = 1
# NOTE: Remember to update RedCAP (FIX THIS)


# Update interventiongroup
redcap['interventiongroup'] = redcap['record_id'].map(manual_interventiongroup).combine_first(redcap['interventiongroup'])

# Remove all columns where every value is NaN
redcap = redcap.dropna(axis=1, how='all')

# Drop unnecessary columns
redcap = redcap.drop(columns=['redcap_event_name', 'redcap_repeat_instance'])

# Remove all columns that end with '_complete'
redcap = redcap.loc[:, ~redcap.columns.str.endswith('_complete')]





In [11]:

# Export the cleaned DataFrame to a new CSV file
redcap.to_csv('cleaned_redcap_data.csv', index=False)

redcap.head(30)


Unnamed: 0,record_id,covid_group,interventiongroup,mmrc_score,pcfs_score,subject_dob,subject_gender,subject_ethnicity,health_ldl,health_hdl,...,pdf_compensation,pdf_outcomemeasures,diary_sessions_1,diary_sessions_2,diary_sessions_3,diary_sessions_4,diary_mip_1,diary_mip_2,diary_mip_3,time_point
1,1,1.0,1.0,0.0,1.0,,,,,,...,pcs01v2 compensation.pdf,pcs01v2 measures.pdf,14.0,14.0,14.0,14.0,65.0,83.0,80.0,2
2,1,1.0,1.0,2.0,2.0,1992-04-16,Female,Latino,,,...,pcs01v1 compensation.pdf,pcs01v1 measures.pdf,,,,,,,,1
4,2,1.0,0.0,2.0,2.0,,,,,,...,pcs02v2 compensation.pdf,pcs02v2 measures.pdf,,,,,,,,2
5,2,1.0,0.0,2.0,2.0,1940-03-13,Female,White,91.0,60.0,...,pcs02v1 compensation.pdf,pcs02v1 measures.pdf,,,,,,,,1
7,3,1.0,1.0,3.0,2.0,,,,,,...,pcs03v2 compensation.pdf,pcs03v2 measures.pdf,14.0,14.0,14.0,14.0,89.0,108.0,120.0,2
8,3,1.0,1.0,4.0,4.0,1976-11-24,Female,Hispanic,,,...,pcs03v1 compensation.pdf,pcs03v1 measures.pdf,,,,,,,,1
10,4,1.0,0.0,0.0,1.0,,,,,,...,,,,,,,,,,2
11,4,1.0,0.0,1.0,2.0,1986-04-30,Male,White,,,...,,,,,,,,,,1
13,5,1.0,0.0,3.0,3.0,,,,,,...,,,,,,,,,,2
14,5,1.0,0.0,3.0,3.0,1963-12-26,,,,,...,,,,,,,,,,1


# Respiratory Strength

In [10]:
# Define measures and suffixes
measures = ['mip', 'fit', 'smip', 'pif', 'sindex', 'volume']
suffixes = ['max', 'min', 'mean']

# Loop through measures and suffixes
for measure in measures:
    for suffix in suffixes:
        pre_col = f"{measure}_pre_{suffix}"
        post_col = f"{measure}_post_{suffix}"

        if pre_col in redcap.columns and post_col in redcap.columns:
            # Fatigue: pre - post
            fatigue_col = f"{measure}_{suffix}_fatigue"
            redcap[fatigue_col] = redcap[pre_col] - redcap[post_col]

            # Fatigue Percent: (pre - post) / pre * 100
            fatigue_percent_col = f"{measure}_{suffix}_fatigue_percent"
            redcap[fatigue_percent_col] = ((redcap[pre_col] - redcap[post_col]) / redcap[pre_col]) * 100

        else:
            print(f"⚠️ Warning: Missing columns for {measure}_{suffix}")


In [11]:
redcap.head()

Unnamed: 0,record_id,covid_group,interventiongroup,mmrc_score,pcfs_score,subject_dob,subject_gender,subject_ethnicity,health_ldl,health_hdl,health_triglyceride,health_smoking,health_history,health_medications,covid_onset,covid_infections,covid_vaccinations,covid_supplemento2,covid_hospitalization,covid_icu,covid_mechanicalvent,covid_intubation,covid_apnea,covid_apnea_ahi,covid_apnea_cpap,covid_hallucinations,covid_taste,covid_smell,covid_hearing,covid_tinnitus,covid_vision,covid_throat,covid_choking,covid_sweating,covid_sneezing,covid_coughing,covid_voice,covid_fevers,covid_hair,covid_skin,covid_digestion,covid_chestpain,covid_palpitations,covid_restlessleg,covid_other,covid_primary,data_age,cpet_hr_restingsit,cpet_sbp_sitting,cpet_dbp_sitting,cpet_hr_restingstand,cpet_sbp_standing,cpet_dbp_standing,cpet_spo2_pre,data_kilograms,data_centimeters,data_bmi,data_gender,fmd_diameter_baseline,fmd_diameter_maximum,fmd_percent,fmd_shear_baseline,fmd_shear_maximum,fmd_shear_auc,fmd_shear_areatomaximum,fmd_velocity_baseline,fmd_velocity_maximum,fmd_csv,imt_mip_1_pre,imt_mip_1_post,imt_mip_2_pre,imt_mip_2_post,imt_mip_3_pre,imt_mip_3_post,imt_fit_1_pre,imt_fit_1_post,imt_fit_2_pre,imt_fit_2_post,imt_fit_3_pre,imt_fit_3_post,imt_smip_1_pre,imt_smip_1_post,imt_smip_2_pre,imt_smip_2_post,imt_smip_3_pre,imt_smip_3_post,imt_pif_1_pre,imt_pif_1_post,imt_pif_2_pre,imt_pif_2_post,imt_pif_3_pre,imt_pif_3_post,imt_sindex_1_pre,imt_sindex_1_post,imt_sindex_2_pre,imt_sindex_2_post,imt_sindex_3_pre,imt_sindex_3_post,imt_volume_1_pre,imt_volume_1_post,imt_volume_2_pre,imt_volume_2_post,imt_volume_3_pre,imt_volume_3_post,imt_mep_1,imt_mep_2,imt_mep_3,imt_meppower_1,imt_meppower_2,imt_meppower_3,imt_mepfit_1,imt_mepfit_2,imt_mepfit_3,cpet_hr_peak,cpet_sbp_peak,cpet_dbp_peak,cpet_borg,cpet_spo2_post,cpet_hr_cooldown1,cpet_hr_cooldown2,cpet_vo2peak_absolute,cpet_vo2peak_relative,cpet_vo2threshold,cpet_vevco2,cpet_rr_peak,cpet_vo2pulse,cpet_petco2,cpet_rer,cpet_chronindex,cpet_hrr1,cpet_hrr2,cpet_csv,hrv_sdnn,hrv_rmssd,hrv_heartrate,hrv_lf,hrv_hf,hrv_lfhf,hrv_csv,woods_concentration,woods_decisions,woods_confusion,woods_memory,woods_words,woods_takethingsin,woods_processingspeed,woods_thoughtsmixed,woods_muzzy,fss_motivation,fss_exercise,fss_easily,fss_functioning,fss_problems,fss_sustained,fss_duties,fss_disabling,fss_social,dsq_heavy_freq,dsq_nextday_freq,dsq_mentallytired_freq,dsq_minexercise_freq,dsq_drained_freq,dsq_heavy_severity,dsq_nextday_severity,dsq_mentallytired_severity,dsq_minexercise_severity,dsq_drained_severity,psqi_sleepstart,psqi_latency,psqi_sleepend,psqi_hours,psqi_latency30,psqi_wake,psqi_bathroom,psqi_breathe,psqi_snore,psqi_cold,psqi_hot,psqi_dreams,psqi_pain,psqi_other,psqi_medicine,psqi_sleepy,psqi_enthusiasm,psqi_quality,ndi_intensity,ndi_personalcare,ndi_lifting,ndi_work,ndi_headaches,ndi_concentration,ndi_sleeping,ndi_driving,ndi_reading,ndi_recreation,odi_intensity,odi_personalcare,odi_lifting,odi_walking,odi_sitting,odi_standing,odi_sleeping,odi_sex,odi_social,odi_traveling,vas_backpainworst,vas_backpainbest,vas_backpaincurrent,vas_neckpainworst,vas_neckpainbest,vas_neckpaincurrent,vas_myalgiaworst,vas_myalgiabest,vas_myalgiacurrent,vas_dizzyworst,vas_dizzybest,vas_dizzycurrent,vas_fatigueworst,vas_fatiguebest,vas_fatiguecurrent,gad_anxious,gad_worrying,phq_hopeless,phq_anhedonia,ptsd_nightmares,ptsd_intrusive,ptsd_startled,ptsd_detached,ptsd_guilty,sfpa_vigorous,sfpa_moderate,sfpa_lifting,sfpa_stairs2,sfpa_stairs1,sfpa_stooping,sfpa_walkingmile,sfpa_walkingblocks2,sfpa_walkingblocks1,sfpa_bathingdress,eq_vas,bdi_functional,bdi_task,bdi_effort,tdi_functional,tdi_task,tdi_effort,pdf_consent,pdf_compensation,pdf_outcomemeasures,diary_sessions_1,diary_sessions_2,diary_sessions_3,diary_sessions_4,diary_mip_1,diary_mip_2,diary_mip_3,time_point,mip_pre_max,mip_pre_min,mip_pre_mean,mip_post_max,mip_post_min,mip_post_mean,fit_pre_max,fit_pre_min,fit_pre_mean,fit_post_max,fit_post_min,fit_post_mean,smip_pre_max,smip_pre_min,smip_pre_mean,smip_post_max,smip_post_min,smip_post_mean,pif_pre_max,pif_pre_min,pif_pre_mean,pif_post_max,pif_post_min,pif_post_mean,sindex_pre_max,sindex_pre_min,sindex_pre_mean,sindex_post_max,sindex_post_min,sindex_post_mean,volume_pre_max,volume_pre_min,volume_pre_mean,volume_post_max,volume_post_min,volume_post_mean,mip_max_fatigue,mip_max_fatigue_percent,mip_min_fatigue,mip_min_fatigue_percent,mip_mean_fatigue,mip_mean_fatigue_percent,fit_max_fatigue,fit_max_fatigue_percent,fit_min_fatigue,fit_min_fatigue_percent,fit_mean_fatigue,fit_mean_fatigue_percent,smip_max_fatigue,smip_max_fatigue_percent,smip_min_fatigue,smip_min_fatigue_percent,smip_mean_fatigue,smip_mean_fatigue_percent,pif_max_fatigue,pif_max_fatigue_percent,pif_min_fatigue,pif_min_fatigue_percent,pif_mean_fatigue,pif_mean_fatigue_percent,sindex_max_fatigue,sindex_max_fatigue_percent,sindex_min_fatigue,sindex_min_fatigue_percent,sindex_mean_fatigue,sindex_mean_fatigue_percent,volume_max_fatigue,volume_max_fatigue_percent,volume_min_fatigue,volume_min_fatigue_percent,volume_mean_fatigue,volume_mean_fatigue_percent
1,1,1.0,1.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,32.0,,105.0,75.0,,90.0,80.0,98.0,72.8,162.0,27.739674,0.0,3.494,3.732,6.82,179.76,654.99,17463.6,15654.0,15.7,64.41,PCS_01_V2_FMD.csv,76.0,74.0,105.0,94.0,106.0,89.0,7.5,12.3,2.8,16.4,24.8,12.8,340.0,449.0,254.0,511.0,630.0,407.0,3.6,3.6,3.6,3.4,3.0,3.6,62.0,62.0,63.0,59.0,51.0,63.0,2.1,2.5,1.2,0.7,1.7,0.7,21.0,14.0,15.0,33.7,304.0,20.1,26.6,30.7,239.0,172.0,190.0,50.0,17.0,98.0,140.0,129.0,2.17,29.8,,24.0,,,,,,32.0,43.0,PCS_01_V2_CPET.XLS,17.67,8.88,81.65,55.97,44.03,1.27,PCS01_V2_RRIntervals.csv,2.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,1.0,4.0,3.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,4.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,0.0,3.0,600.0,6.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,7.0,2.0,3.0,6.0,1.0,0.0,9.0,1.0,3.0,7.0,1.0,2.0,8.0,2.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,80.0,,,,3.0,3.0,3.0,,pcs01v2 compensation.pdf,pcs01v2 measures.pdf,14.0,14.0,14.0,14.0,65.0,83.0,80.0,2,106.0,76.0,95.666667,94.0,74.0,85.666667,24.8,2.8,11.7,16.4,12.3,13.833333,630.0,254.0,408.0,511.0,407.0,455.666667,3.6,3.0,3.4,3.6,3.4,3.533333,63.0,51.0,58.666667,63.0,59.0,61.333333,2.1,1.2,1.666667,2.5,0.7,1.3,12.0,11.320755,2.0,2.631579,10.0,10.452962,8.4,33.870968,-9.5,-339.285714,-2.133333,-18.233618,119.0,18.888889,-153.0,-60.23622,-47.666667,-11.683007,0.0,0.0,-0.4,-13.333333,-0.133333,-3.921569,0.0,0.0,-8.0,-15.686275,-2.666667,-4.545455,-0.4,-19.047619,0.5,41.666667,0.366667,22.0
2,1,1.0,1.0,2.0,2.0,1992-04-16,Female,Latino,,,,0.0,hypothyroid,levothyroxine,48.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,,brain fog,32.0,,105.0,80.0,,115.0,80.0,,76.0,158.0,30.443839,,3.288,3.473,5.61,91.81,376.14,9104.4,7166.2,7.53,30.38,PCS_01_V1_FMD.csv,52.0,31.0,47.0,28.0,46.0,31.0,24.4,9.3,25.0,18.7,30.7,12.6,360.0,180.0,416.0,273.0,402.0,223.0,2.9,3.0,2.7,2.5,2.9,2.8,50.0,52.0,47.0,44.0,50.0,49.0,2.6,2.9,2.5,3.0,2.7,2.9,,,,,,,,,,168.0,180.0,80.0,19.0,,,,1.899,25.0,,27.1,27.47,11.0,,0.94,,,,PCS_01_V1_CPET.XLS,19.94,11.11,79.37,43.22,56.78,0.76,PCS01_V1 _RRIntervals.csv,4.0,3.0,3.0,3.0,4.0,4.0,3.0,3.0,4.0,7.0,7.0,7.0,6.0,6.0,6.0,7.0,6.0,6.0,3.0,4.0,4.0,3.0,4.0,3.0,4.0,3.0,3.0,3.0,0.0,10.0,600.0,6.0,0.0,3.0,3.0,1.0,1.0,2.0,2.0,1.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,4.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,1.0,8.0,0.0,5.0,2.0,0.0,0.0,6.0,0.0,0.0,6.0,0.0,0.0,10.0,4.0,6.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,85.0,2.0,2.0,3.0,,,,pcs01 informed consent.pdf,pcs01v1 compensation.pdf,pcs01v1 measures.pdf,,,,,,,,1,52.0,46.0,48.333333,31.0,28.0,30.0,30.7,24.4,26.7,18.7,9.3,13.533333,416.0,360.0,392.666667,273.0,180.0,225.333333,2.9,2.7,2.833333,3.0,2.5,2.766667,50.0,47.0,49.0,52.0,44.0,48.333333,2.7,2.5,2.6,3.0,2.9,2.933333,21.0,40.384615,18.0,39.130435,18.333333,37.931034,12.0,39.087948,15.1,61.885246,13.166667,49.313358,143.0,34.375,180.0,50.0,167.333333,42.614601,-0.1,-3.448276,0.2,7.407407,0.066667,2.352941,-2.0,-4.0,3.0,6.382979,0.666667,1.360544,-0.3,-11.111111,-0.4,-16.0,-0.333333,-12.820513
4,2,1.0,0.0,2.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,84.0,70.0,138.0,80.0,73.0,136.0,80.0,98.0,66.3,164.7,24.441414,,4.175,4.33,3.7,150.63,544.37,12135.8,8989.1,15.67,57.81,PCS_02_V2_FMD Diamter.csv,47.0,56.0,37.0,44.0,33.0,47.0,1.2,7.2,1.0,8.4,2.9,8.8,90.0,294.0,75.0,268.0,138.0,299.0,2.4,2.9,2.4,2.7,2.4,2.8,42.0,51.0,42.0,47.0,43.0,50.0,1.7,1.5,1.9,1.7,1.1,1.7,,,,,,,,,,112.0,180.0,70.0,17.0,97.0,106.0,102.0,0.93,14.1,,,,,27.0,0.9,0.636364,6.0,10.0,PCS_02_V2_CPET.XLS,12.42,8.79,62.97,67.9,32.1,2.12,PCS02_V2_RRIntervals.csv,2.0,1.0,1.0,3.0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,4.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,3.0,1.0,0.0,1.0,1.0,3.0,1.0,0.0,100.0,40.0,830.0,7.0,3.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,5.0,0.0,0.0,0.0,8.0,1.0,5.0,1.0,1.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,1.0,2.0,1.0,2.0,3.0,3.0,3.0,87.0,,,,3.0,2.0,0.0,,pcs02v2 compensation.pdf,pcs02v2 measures.pdf,,,,,,,,2,47.0,33.0,39.0,56.0,44.0,49.0,2.9,1.0,1.7,8.8,7.2,8.133333,138.0,75.0,101.0,299.0,268.0,287.0,2.4,2.4,2.4,2.9,2.7,2.8,43.0,42.0,42.333333,51.0,47.0,49.333333,1.9,1.1,1.566667,1.7,1.5,1.633333,-9.0,-19.148936,-11.0,-33.333333,-10.0,-25.641026,-5.9,-203.448276,-6.2,-620.0,-6.433333,-378.431373,-161.0,-116.666667,-193.0,-257.333333,-186.0,-184.158416,-0.5,-20.833333,-0.3,-12.5,-0.4,-16.666667,-8.0,-18.604651,-5.0,-11.904762,-7.0,-16.535433,0.2,10.526316,-0.4,-36.363636,-0.066667,-4.255319
5,2,1.0,0.0,2.0,2.0,1940-03-13,Female,White,91.0,60.0,54.0,0.0,lung cancer 2016,"losartan, mirobegron, trelegy, albuterol, sert...",44.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,,,,,1.0,1.0,,,,,,,,,1.0,,,,,,,,,brain fog,84.0,70.0,150.0,85.0,71.0,150.0,90.0,,66.1,164.8,24.338121,,4.338,4.473,3.13,271.98,493.63,3829.3,3829.3,29.49,52.88,PCS_02_V1_FMD.csv,49.0,45.0,39.0,40.0,48.0,44.0,5.9,6.5,1.5,8.5,12.5,12.0,254.0,262.0,111.0,281.0,330.0,330.0,3.0,2.3,2.9,2.9,3.3,2.6,53.0,41.0,51.0,49.0,57.0,45.0,1.8,2.3,1.3,2.2,1.7,1.9,,,,,,,,,,105.0,,,17.0,,98.0,91.0,0.96,14.5,,32.9,,,26.0,,0.530303,7.0,14.0,PCS_02_V1_CPET.XLS,33.5,38.64,69.88,41.85,58.15,0.72,PCS02_V1_RRIntervals.csv,4.0,3.0,2.0,4.0,4.0,1.0,1.0,1.0,2.0,7.0,4.0,4.0,4.0,4.0,4.0,4.0,6.0,6.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,0.0,100.0,15.0,830.0,6.0,2.0,3.0,3.0,1.0,0.0,0.0,0.0,1.0,1.0,,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,5.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,8.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,3.0,1.0,2.0,2.0,3.0,3.0,3.0,3.0,80.0,2.0,2.0,3.0,,,,pcs02 informed consent.pdf,pcs02v1 compensation.pdf,pcs02v1 measures.pdf,,,,,,,,1,49.0,39.0,45.333333,45.0,40.0,43.0,12.5,1.5,6.633333,12.0,6.5,9.0,330.0,111.0,231.666667,330.0,262.0,291.0,3.3,2.9,3.066667,2.9,2.3,2.6,57.0,51.0,53.666667,49.0,41.0,45.0,1.8,1.3,1.6,2.3,1.9,2.133333,4.0,8.163265,-1.0,-2.564103,2.333333,5.147059,0.5,4.0,-5.0,-333.333333,-2.366667,-35.678392,0.0,0.0,-151.0,-136.036036,-59.333333,-25.611511,0.4,12.121212,0.6,20.689655,0.466667,15.217391,8.0,14.035088,10.0,19.607843,8.666667,16.149068,-0.5,-27.777778,-0.6,-46.153846,-0.533333,-33.333333
7,3,1.0,1.0,3.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,48.0,91.0,132.0,98.0,98.0,122.0,85.0,,111.5,159.8,43.663779,0.0,3.03,3.245,7.1,147.74,465.45,21002.5,7422.2,11.19,35.59,,136.0,128.0,117.0,122.0,112.0,114.0,2.5,3.1,2.9,3.8,4.1,5.2,251.0,268.0,258.0,286.0,260.0,312.0,3.7,4.5,4.3,4.8,3.7,4.6,63.0,77.0,74.0,82.0,64.0,80.0,1.6,2.2,2.0,2.6,1.6,2.5,63.0,56.0,70.0,17.6,16.8,518.0,484.0,471.0,18.8,165.0,160.0,65.0,13.0,,152.0,138.0,1.83,16.4,,25.0,,,33.0,0.92,0.91358,13.0,27.0,,40.0,36.78,82.41,34.71,65.29,0.53,maria 2_RRIntervals.csv,4.0,1.0,1.0,2.0,1.0,1.0,0.0,1.0,1.0,5.0,5.0,4.0,5.0,5.0,5.0,5.0,4.0,5.0,3.0,4.0,0.0,3.0,3.0,3.0,4.0,0.0,4.0,3.0,2200.0,15.0,600.0,6.5,1.0,2.0,1.0,1.0,2.0,0.0,0.0,1.0,1.0,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,10.0,6.0,4.0,10.0,6.0,1.0,10.0,6.0,2.0,10.0,3.0,0.0,10.0,5.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,90.0,,,,3.0,3.0,1.0,,pcs03v2 compensation.pdf,pcs03v2 measures.pdf,14.0,14.0,14.0,14.0,89.0,108.0,120.0,2,136.0,112.0,121.666667,128.0,114.0,121.333333,4.1,2.5,3.166667,5.2,3.1,4.033333,260.0,251.0,256.333333,312.0,268.0,288.666667,4.3,3.7,3.9,4.8,4.5,4.633333,74.0,63.0,67.0,82.0,77.0,79.666667,2.0,1.6,1.733333,2.6,2.2,2.433333,8.0,5.882353,-2.0,-1.785714,0.333333,0.273973,-1.1,-26.829268,-0.6,-24.0,-0.866667,-27.368421,-52.0,-20.0,-17.0,-6.772908,-32.333333,-12.613784,-0.5,-11.627907,-0.8,-21.621622,-0.733333,-18.803419,-8.0,-10.810811,-14.0,-22.222222,-12.666667,-18.905473,-0.6,-30.0,-0.6,-37.5,-0.7,-40.384615


# Subjective Questionnaires

## Woods Mental Fatigue Inventory

In [13]:
# List of columns to sum
woods_columns = [
    'woods_concentration', 'woods_decisions', 'woods_confusion',
    'woods_memory', 'woods_words', 'woods_takethingsin',
    'woods_processingspeed', 'woods_thoughtsmixed', 'woods_muzzy'
]

# Create the new column 'woods_sum'
redcap['woods_sum'] = redcap[woods_columns].sum(axis=1)

# Display mean, min, max, and std of the 'woods_sum' column
print("Woods Sum Statistics:")
print(f"Mean: {redcap['woods_sum'].mean()}")
print(f"Min: {redcap['woods_sum'].min()}")
print(f"Max: {redcap['woods_sum'].max()}")
print(f"Std: {redcap['woods_sum'].std()}")


Woods Sum Statistics:
Mean: 16.483870967741936
Min: 1.0
Max: 31.0
Std: 8.958686539673606


## Fatigue Severity Scale (FSS)

In [14]:
# List of columns to sum
fss_columns = ['fss_motivation', 'fss_exercise', 'fss_easily', 'fss_functioning',
       'fss_problems', 'fss_sustained', 'fss_duties', 'fss_disabling',
       'fss_social']

# Create the new column 'fss_sum'
redcap['fss_sum'] = redcap[fss_columns].sum(axis=1)

# Create a new column fss_dichotomous if fss_sum is greater than 36
redcap['fss_dichotomous'] = redcap['fss_sum'].apply(lambda x: 1 if x > 36 else 0)

# Print the mean, min, max, and std of the 'fss_sum' column
print("FSS Sum Statistics:")
print(f"Mean: {redcap['fss_sum'].mean()}")
print(f"Min: {redcap['fss_sum'].min()}")
print(f"Max: {redcap['fss_sum'].max()}")
print(f"Std: {redcap['fss_sum'].std()}")

FSS Sum Statistics:
Mean: 38.67741935483871
Min: 10.0
Max: 62.0
Std: 16.682100380895672


## PEM (DSQ)

In [15]:
# https://pmc.ncbi.nlm.nih.gov/articles/PMC6165517/
# A frequency and severity score of 2, 2 on any items 1–5 is indicative of PEM.

# Lists of columns
freq_cols = [
    'dsq_heavy_freq', 'dsq_nextday_freq', 'dsq_mentallytired_freq',
    'dsq_minexercise_freq', 'dsq_drained_freq'
]

severity_cols = [
    'dsq_heavy_severity', 'dsq_nextday_severity', 'dsq_mentallytired_severity',
    'dsq_minexercise_severity', 'dsq_drained_severity'
]

# Sum of frequency
redcap['dsq_freq_sum'] = redcap[freq_cols].sum(axis=1)

# Sum of severity
redcap['dsq_severity_sum'] = redcap[severity_cols].sum(axis=1)

# Sum of both frequency and severity
redcap['dsq_sum'] = redcap['dsq_freq_sum'] + redcap['dsq_severity_sum']

# Dichotomous indicator (1 if any item has freq >=2 AND severity >=2)
def compute_dsq_dichotomous(row):
    for freq_col, sev_col in zip(freq_cols, severity_cols):
        if (row[freq_col] >= 2) and (row[sev_col] >= 2):
            return 1
    return 0

redcap['dsq_dichotomous'] = redcap.apply(compute_dsq_dichotomous, axis=1)

# Print the frequency counts for dsq_dichotomous
print("DSQ Dichotomous Frequency Counts:")
print(redcap['dsq_dichotomous'].value_counts())

# Print the mean, min, max, and std of the 'dsq_sum' column
print("DSQ Sum Statistics:")
print(f"Mean: {redcap['dsq_sum'].mean()}")
print(f"Min: {redcap['dsq_sum'].min()}")
print(f"Max: {redcap['dsq_sum'].max()}")



DSQ Dichotomous Frequency Counts:
dsq_dichotomous
1    19
0    12
Name: count, dtype: int64
DSQ Sum Statistics:
Mean: 15.290322580645162
Min: 0.0
Max: 38.0


## Pittsburgh Sleep Index (PSQI)

In [16]:
# Keep the raw Q2 latency in a separate column (assuming it's the original minutes)
# If 'psqi_latency' currently holds raw minutes, just rename it to 'psqi_latency_raw'
redcap['psqi_latency_raw'] = redcap['psqi_latency']

# Recode Q2 (raw latency in minutes) into 0-3 scale
def recode_q2(x):
    if pd.isnull(x):
        return np.nan
    elif x < 15:
        return 0
    elif 15 <= x <= 30:
        return 1
    elif 31 <= x <= 60:
        return 2
    else:  # > 60 minutes
        return 3

redcap['psqi_latency_q2'] = redcap['psqi_latency_raw'].apply(recode_q2)

# Q5a is already coded as 0-3, keep it as is in 'psqi_latency30'
redcap['psqi_latency_q5a'] = redcap['psqi_latency30']

# Sum the two recoded latency subscores
redcap['psqi_latency_sum'] = redcap[['psqi_latency_q2', 'psqi_latency_q5a']].sum(axis=1)

# Map the summed value into the final component score
def map_latency_component(x):
    if pd.isnull(x):
        return np.nan
    elif x == 0:
        return 0
    elif 1 <= x <= 2:
        return 1
    elif 3 <= x <= 4:
        return 2
    else:  # 5-6
        return 3

redcap['psqi_latency'] = redcap['psqi_latency_sum'].apply(map_latency_component)

# 3. Sleep Duration
def score_sleep_duration(hours):
    if hours >= 7:
        return 0
    elif 6 <= hours < 7:
        return 1
    elif 5 <= hours < 6:
        return 2
    else:
        return 3

redcap['psqi_duration'] = redcap['psqi_hours'].apply(score_sleep_duration)

# 4. Sleep Efficiency
def hhmm_to_decimal(time_val):
    """
    Converts HHMM integer to decimal hours.
    E.g. 2230 -> 22 + 30/60 = 22.5
    """
    if pd.isnull(time_val):
        return np.nan
    hours = time_val // 100
    minutes = time_val % 100
    return hours + minutes / 60.0

# --- Convert psqi_sleepstart and psqi_sleepend to decimal hours ---
redcap['sleepstart_decimal'] = redcap['psqi_sleepstart'].apply(hhmm_to_decimal)
redcap['sleepend_decimal'] = redcap['psqi_sleepend'].apply(hhmm_to_decimal)

# --- Calculate Time in Bed (handling overnight shifts) ---
def calculate_time_in_bed(start, end):
    if pd.isnull(start) or pd.isnull(end):
        return np.nan
    time_in_bed = end - start
    if time_in_bed <= 0:
        time_in_bed += 24
    return time_in_bed

redcap['time_in_bed'] = redcap.apply(
    lambda row: calculate_time_in_bed(row['sleepstart_decimal'], row['sleepend_decimal']), axis=1
)

# --- Calculate Sleep Efficiency ---
redcap['sleep_efficiency'] = (redcap['psqi_hours'] / redcap['time_in_bed']) * 100

# --- Score PSQI Component 4: Habitual Sleep Efficiency ---
def score_sleep_efficiency(efficiency):
    if pd.isnull(efficiency):
        return np.nan
    elif efficiency >= 85:
        return 0
    elif 75 <= efficiency < 85:
        return 1
    elif 65 <= efficiency < 75:
        return 2
    else:
        return 3

redcap['psqi_efficiency'] = redcap['sleep_efficiency'].apply(score_sleep_efficiency)

# 5. Sleep Disturbances
disturbance_items = [
    'psqi_wake', 'psqi_bathroom', 'psqi_breathe', 'psqi_snore',
    'psqi_cold', 'psqi_hot', 'psqi_dreams', 'psqi_pain', 'psqi_other'
]

# TEMPORARY FIX: NEED TO REMOVE LATER AND FIX DATA ENTRY
redcap['psqi_other'] = pd.to_numeric(redcap['psqi_other'], errors='coerce').fillna(0)

redcap['psqi_disturbances_raw'] = redcap[disturbance_items].sum(axis=1)

redcap['psqi_disturbances'] = pd.cut(
    redcap['psqi_disturbances_raw'],
    bins=[-1, 0, 9, 18, 27],  # Adjusted bins
    labels=[0, 1, 2, 3]
).astype(int)


# 6. Use of Sleep Medications
redcap['psqi_medication'] = redcap['psqi_medicine']

# 7. Daytime Dysfunction
redcap['psqi_dysfunction_raw'] = redcap[['psqi_sleepy', 'psqi_enthusiasm']].sum(axis=1)
redcap['psqi_dysfunction'] = pd.cut(
    redcap['psqi_dysfunction_raw'],
    bins=[-1, 0, 2, 4, 6],
    labels=[0, 1, 2, 3]
).astype(int)

# 8. Calculate PSQI Sum
component_cols = [
    'psqi_quality', 'psqi_latency', 'psqi_duration', 
    'psqi_efficiency', 'psqi_disturbances', 
    'psqi_medication', 'psqi_dysfunction'
]
redcap['psqi_sum'] = redcap[component_cols].sum(axis=1)

# 9. Dichotomous classification: 0 = Good sleep, 1 = Poor sleep
redcap['psqi_dichotomous'] = (redcap['psqi_sum'] > 5).astype(int)


# List of all component columns plus the sum
components = [
    'psqi_quality', 'psqi_latency', 'psqi_duration', 'psqi_efficiency',
    'psqi_disturbances', 'psqi_medication', 'psqi_dysfunction', 'psqi_sum'
]

# Summary stats: mean, min, max
summary_stats = redcap[components].agg(['mean', 'min', 'max'])
print("Summary statistics for PSQI components and sum:")
print(summary_stats)

# Frequency counts for the dichotomous variable
dichotomous_counts = redcap['psqi_dichotomous'].value_counts(dropna=False)
print("\nFrequency counts for psqi_dichotomous:")
print(dichotomous_counts)


Summary statistics for PSQI components and sum:
      psqi_quality  psqi_latency  psqi_duration  psqi_efficiency  \
mean      1.387097      1.419355       0.935484          0.83871   
min       0.000000      0.000000       0.000000          0.00000   
max       3.000000      3.000000       3.000000          3.00000   

      psqi_disturbances  psqi_medication  psqi_dysfunction  psqi_sum  
mean           1.483871         0.935484           1.16129   8.16129  
min            0.000000         0.000000           0.00000   1.00000  
max            3.000000         3.000000           3.00000  18.00000  

Frequency counts for psqi_dichotomous:
psqi_dichotomous
1    21
0    10
Name: count, dtype: int64


  redcap['psqi_disturbances'] = pd.cut(
  redcap['psqi_medication'] = redcap['psqi_medicine']
  redcap['psqi_dysfunction_raw'] = redcap[['psqi_sleepy', 'psqi_enthusiasm']].sum(axis=1)
  redcap['psqi_dysfunction'] = pd.cut(
  redcap['psqi_sum'] = redcap[component_cols].sum(axis=1)
  redcap['psqi_dichotomous'] = (redcap['psqi_sum'] > 5).astype(int)


## BDI-TDI

In [19]:
# Compute BDI Score
redcap['bdi_sum'] = redcap[['bdi_functional', 'bdi_task', 'bdi_effort']].sum(axis=1)

# Compute TDI Score
redcap['tdi_sum'] = redcap[['tdi_functional', 'tdi_task', 'tdi_effort']].sum(axis=1)


  redcap['bdi_sum'] = redcap[['bdi_functional', 'bdi_task', 'bdi_effort']].sum(axis=1)
  redcap['tdi_sum'] = redcap[['tdi_functional', 'tdi_task', 'tdi_effort']].sum(axis=1)


## ODI / NDI

In [21]:
# Sum ODI columns
odi_cols = [
    'odi_intensity', 'odi_personalcare', 'odi_lifting', 'odi_walking',
    'odi_sitting', 'odi_standing', 'odi_sleeping', 'odi_sex',
    'odi_social', 'odi_traveling'
]
redcap['odi_sum'] = redcap[odi_cols].sum(axis=1, skipna=True)

# Sum NDI columns
ndi_cols = [
    'ndi_intensity', 'ndi_personalcare', 'ndi_lifting', 'ndi_work',
    'ndi_headaches', 'ndi_concentration', 'ndi_sleeping',
    'ndi_driving', 'ndi_reading', 'ndi_recreation'
]
redcap['ndi_sum'] = redcap[ndi_cols].sum(axis=1, skipna=True)


  redcap['odi_sum'] = redcap[odi_cols].sum(axis=1, skipna=True)
  redcap['ndi_sum'] = redcap[ndi_cols].sum(axis=1, skipna=True)


## Psychology Questionnaire

In [23]:
# Define the columns for each measure
anxiety_cols = ['gad_anxious', 'gad_worrying']
depression_cols = ['phq_hopeless', 'phq_anhedonia']
ptsd_cols = ['ptsd_nightmares', 'ptsd_intrusive', 'ptsd_startled', 'ptsd_detached', 'ptsd_guilty']

# Calculate sum scores
redcap['anxiety_sum'] = redcap[anxiety_cols].sum(axis=1, skipna=True)
redcap['depression_sum'] = redcap[depression_cols].sum(axis=1, skipna=True)
redcap['ptsd_sum'] = redcap[ptsd_cols].sum(axis=1, skipna=True)

# Create dichotomous variables
# PTSD-5: 3 most sensitive, 5 most specific, 4 most efficient https://pmc.ncbi.nlm.nih.gov/articles/PMC5023594/ 
# PHq-2: >=2 --> https://pubmed.ncbi.nlm.nih.gov/33026888/, https://jamanetwork.com/journals/jama/fullarticle/2766865
# GAD-2: >=3 --> https://www.sciencedirect.com/science/article/abs/pii/S0003999318303903, https://www.sciencedirect.com/science/article/abs/pii/S0163834315002406, https://pmc.ncbi.nlm.nih.gov/articles/PMC6163062/, https://pmc.ncbi.nlm.nih.gov/articles/PMC7306644/
redcap['anxiety_dichotomous'] = (redcap['anxiety_sum'] >= 3).astype(int)
redcap['depression_dichotomous'] = (redcap['depression_sum'] >= 2).astype(int)
redcap['ptsd_dichotomous'] = (redcap['ptsd_sum'] >= 3).astype(int)


  redcap['anxiety_sum'] = redcap[anxiety_cols].sum(axis=1, skipna=True)
  redcap['depression_sum'] = redcap[depression_cols].sum(axis=1, skipna=True)
  redcap['ptsd_sum'] = redcap[ptsd_cols].sum(axis=1, skipna=True)
  redcap['anxiety_dichotomous'] = (redcap['anxiety_sum'] >= 3).astype(int)
  redcap['depression_dichotomous'] = (redcap['depression_sum'] >= 2).astype(int)
  redcap['ptsd_dichotomous'] = (redcap['ptsd_sum'] >= 3).astype(int)


## SF-PA

In [26]:
# Define the columns for SFPA
sfpa_cols = [
    'sfpa_vigorous', 'sfpa_moderate', 'sfpa_lifting',
    'sfpa_stairs2', 'sfpa_stairs1', 'sfpa_stooping',
    'sfpa_walkingmile', 'sfpa_walkingblocks2',
    'sfpa_walkingblocks1', 'sfpa_bathingdress'
]

# Calculate the sum score
redcap['sfpa_sum'] = redcap[sfpa_cols].sum(axis=1, skipna=True)

# Export

In [25]:
redcap.head(30)

Unnamed: 0,record_id,covid_group,interventiongroup,mmrc_score,pcfs_score,subject_dob,subject_gender,subject_ethnicity,health_ldl,health_hdl,...,sindex_pre_mean,sindex_post_max,sindex_post_min,sindex_post_mean,volume_pre_max,volume_pre_min,volume_pre_mean,volume_post_max,volume_post_min,volume_post_mean
1,1,1.0,1.0,0.0,1.0,,,,,,...,58.666667,63.0,59.0,61.333333,2.1,1.2,1.666667,2.5,0.7,1.3
2,1,1.0,1.0,2.0,2.0,1992-04-16,Female,Latino,,,...,49.0,52.0,44.0,48.333333,2.7,2.5,2.6,3.0,2.9,2.933333
4,2,1.0,0.0,2.0,2.0,,,,,,...,42.333333,51.0,47.0,49.333333,1.9,1.1,1.566667,1.7,1.5,1.633333
5,2,1.0,0.0,2.0,2.0,1940-03-13,Female,White,91.0,60.0,...,53.666667,49.0,41.0,45.0,1.8,1.3,1.6,2.3,1.9,2.133333
7,3,1.0,1.0,3.0,2.0,,,,,,...,67.0,82.0,77.0,79.666667,2.0,1.6,1.733333,2.6,2.2,2.433333
8,3,1.0,1.0,4.0,4.0,1976-11-24,Female,Hispanic,,,...,58.333333,52.0,39.0,46.0,2.7,1.3,2.033333,2.7,2.5,2.633333
10,4,1.0,0.0,0.0,1.0,,,,,,...,99.666667,114.0,96.0,104.0,3.4,3.0,3.166667,3.1,2.3,2.7
11,4,1.0,0.0,1.0,2.0,1986-04-30,Male,White,,,...,67.0,95.0,89.0,91.333333,3.2,2.5,2.933333,3.3,2.8,3.1
13,5,1.0,0.0,3.0,3.0,,,,,,...,48.666667,49.0,34.0,42.0,3.3,2.8,3.033333,3.0,2.7,2.866667
14,5,1.0,0.0,3.0,3.0,1963-12-26,,,,,...,42.666667,53.0,44.0,47.333333,3.7,0.3,2.333333,3.8,0.5,2.566667


In [27]:
# Create DataFrame for time_point = 1
df_CS = redcap[redcap['time_point'] == 1].copy()

# Create DataFrame for Covid_group = 1
df_RCT = redcap[redcap['covid_group'] == 1].copy()

# Export to CSV files
df_CS.to_csv('df_CS.csv', index=False)
df_RCT.to_csv('df_RCT.csv', index=False)
