Script to rescale the adversity scores and combine metadata of all samples

In [3]:
import os
import glob
import subprocess
import numpy as np
import shutil
import pandas as pd
import csv
import numpy as np
import nibabel as nib
from sklearn.preprocessing import MinMaxScaler

main_dir = '/project/3022000.05/linschl/data/'

In [7]:
### load all datafiles
hbs = pd.read_csv('/project/3022000.05/linschl/data/hbs/hbs_metadata_20240820.csv')
mindset = pd.read_csv('/project/3022000.05/linschl/data/mindset/imputed_data/mindset_metadata_imp_20240403.csv')
imagen = pd.read_csv('/project/3022000.05/linschl/data/imagen/imagen_metadata_20240820.csv')
become = pd.read_csv('/project/3022000.05/linschl/data/become/become_metadata_20240820.csv')
strat = pd.read_csv('/project/3022000.05/linschl/data/stratify/stratify_metadata_20241210.csv')

### combined covariate dataframe

In [8]:
### select variables
cov_cols = ['subj_id','age','sex', 'diagnosis','dataset','site', 'eTIV', 'CTQ_emotional_abuse', 
            'CTQ_physical_abuse', 'CTQ_sexual_abuse','CTQ_emotional_neglect']

cov_cols_mindset = ['subj_id','age','sex', 'diagnosis','dataset','site', 'eTIV','EmNeg_freq', 'PsychAb_freq',
                    'SA_freq', 'PhysAb_freq']

hbs_cov = hbs[cov_cols]
become_cov = become[cov_cols]
imagen_cov = imagen[cov_cols]
strat_cov = strat[cov_cols]
mindset_cov = mindset[cov_cols_mindset]

### combine CTQ data
combined_data = pd.concat([hbs_cov, imagen_cov, become_cov, strat_cov], ignore_index=True)

### rename CTQ cols
combined_data = combined_data.rename(columns={'CTQ_emotional_abuse':'emotional_abuse',
                                            'CTQ_emotional_neglect':'emotional_neglect',
                                            'CTQ_physical_abuse':'physical_abuse',
                                            'CTQ_sexual_abuse':'sexual_abuse'})

### rename NEMESIS cols
mindset_cov = mindset_cov.rename(columns={'EmNeg_freq':'emotional_neglect', 
                                            'PsychAb_freq':'emotional_abuse',
                                            'SA_freq':'sexual_abuse', 
                                            'PhysAb_freq':'physical_abuse'})

### add mindset data
full_data = pd.concat([mindset_cov,combined_data])

In [9]:
### save combined data
full_data.to_csv('/project/3022000.05/linschl/data/full_metadata_20241210.csv', index=False)

### rescale the trauma variables

In [11]:
# specify ct columns
ct_cols = ['emotional_abuse', 'physical_abuse', 'sexual_abuse', 'emotional_neglect']

# determine theoretical min and max
min_ctq = 5
max_ctq = 25
min_nem = 0 #min nemesis
max_nem = 5 #max nemesis
max_nem_en = 4 #max emotional neglect

In [12]:
# transform ctq data
for col in ct_cols:
    combined_data[col + '_scaled'] = (combined_data[col] - min_ctq) / (max_ctq - min_ctq)

In [13]:
# transform nemesis data 
ct_cols = ['emotional_abuse', 'physical_abuse', 'sexual_abuse']

for col in ct_cols:
    mindset_cov[col + '_scaled'] = (mindset_cov[col] - min_nem) / (max_nem - min_nem)

# rescale emotional neglect separately
mindset_cov['emotional_neglect_scaled'] = (mindset_cov['emotional_neglect'] - min_nem) / (max_nem_en - min_nem)

In [14]:
### combine and save rescaled data
full_data_scaled = pd.concat([mindset_cov,combined_data])

# drop original ct data
#ct_cols = ['emotional_abuse', 'physical_abuse', 'sexual_abuse','emotional_neglect']
#full_data_scaled = full_data_scaled.drop(ct_cols, axis=1)

In [16]:
full_data_scaled.to_csv('/project/3022000.05/linschl/data/full_metadata_rescaled_20241210.csv', index=False)

### rescale imputed datasets

In [17]:
### load all datafiles
df = pd.read_csv('/project/3022000.05/linschl/data/mindset/imputed_data/Mindset_full_imp_20240326.csv')

In [18]:
cov_cols_mindset = ['subj_id','.imp.1', 'age','sex', 'diagnosis','dataset','site', 'eTIV','EmNeg_freq', 'PsychAb_freq',
                    'SA_freq', 'PhysAb_freq']

mindset_cov = df[cov_cols_mindset]

# rename NEMESIS cols
mindset_cov = mindset_cov.rename(columns={'EmNeg_freq':'emotional_neglect', 
                                            'PsychAb_freq':'emotional_abuse',
                                            'SA_freq':'sexual_abuse', 
                                            'PhysAb_freq':'physical_abuse'})
# specify ct columns
ct_cols = ['emotional_abuse', 'physical_abuse', 'sexual_abuse'] #'emotional_neglect']

# determine theoretical min and max
min_nem = 0 #min nemesis
max_nem = 5 #max nemesis
max_nem_en = 4 #max emotional neglect

# transform nemesis data 
for col in ct_cols:
    mindset_cov[col + '_scaled'] = (mindset_cov[col] - min_nem) / (max_nem - min_nem)

# rescale emotional neglect separately
mindset_cov['emotional_neglect_scaled'] = (mindset_cov['emotional_neglect'] - min_nem) / (max_nem_en - min_nem)

# drop original ct data
ct_cols = ['emotional_abuse', 'physical_abuse', 'sexual_abuse','emotional_neglect']
mindset_cov = mindset_cov.drop(ct_cols, axis=1)

In [20]:
mindset_cov.to_csv('/project/3022000.05/linschl/data/mindset/imputed_data/full_imputed_mindset_rescaled_20240722.csv', index=False)