In [None]:
import os
import glob
import fnmatch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from src.configs import configs

In [None]:
epoc = pd.read_spss(f"{configs.PROJECT_ROOT}/EPOC/EPOC_data_forSP9_mLPP.sav")

In [174]:
epoc.shape

(304, 342)

#### Check variable values

In [5]:
epoc['Status'].unique()

['Index', 'Kontrolle']
Categories (2, object): ['Index', 'Kontrolle']

In [6]:
epoc['Ausschluss'].unique()

['Kein Ausschluss', 'Ausschluss']
Categories (2, object): ['Ausschluss', 'Kein Ausschluss']

In [7]:
epoc['Response'].value_counts(dropna=False)

NaN    179
1.0     75
0.0     50
Name: Response, dtype: int64

In [8]:
epoc['Remission'].value_counts(dropna=False)

NaN    179
0.0     63
1.0     62
Name: Remission, dtype: int64

#### Delete healthy controls

In [175]:
target_to_list_status = ['Index']

epoc = epoc[epoc['Status'].isin(target_to_list_status)]
print(epoc.shape)

(142, 342)


#### Delete "Ausschluss" subjects

In [176]:
target_to_list = ['Kein Ausschluss']

epoc = epoc[epoc['Ausschluss'].isin(target_to_list)]
print(epoc.shape)

(134, 342)


#### Delete subjects with missing "Response" or "Remission" values

In [177]:
epoc = epoc.dropna(subset=['Response'])
print(epoc.shape)

(119, 342)


In [178]:
epoc = epoc.dropna(subset=['Remission'])
print(epoc.shape)

(119, 342)


Subjects with missing Response values are identical to subjects with missing Remission values

#### Check for NaNs

In [13]:
epoc['Geschlecht'].value_counts(dropna=False)

Weiblich    62
Männlich    57
Name: Geschlecht, dtype: int64

In [None]:
epoc['Alter'].value_counts(dropna=False)

#### Save files for all patients (that were not excluded from the study)

In [15]:
# all variables
epoc.to_csv(f'{configs.PROJECT_ROOT}/EPOC/csv_files/EPOC_all_pat_119.csv', index=False)

#### Save files for patients that have structural MRI scans

In [None]:
pat_labels_julia = epoc['Code'].unique()
pat_labels_julia.sort()
print(pat_labels_julia)
print(pat_labels_julia.shape)

In [18]:
pat_labels_julia = np.ndarray.tolist(pat_labels_julia)

In [None]:
pat_labels_christian = fnmatch.filter(os.listdir(f'{configs.PROJECT_ROOT}/EPOC/EPOC_BIDS/EPOC_derivatives'), 'sub-epocp*')
pat_labels_christian.sort()
pat_labels_christian

In [20]:
pat_labels_christian_new = []
for subj in pat_labels_christian:
    
    subj_new = subj[4:8] + '_' + subj[8] + '_' + subj[9:None]
    pat_labels_christian_new.append(subj_new)

In [21]:
len(pat_labels_christian_new)

103

In [22]:
epoc_T1only = epoc.loc[epoc['Code'].isin(pat_labels_christian_new)]

In [23]:
epoc_T1only.shape

(90, 342)

In [None]:
epoc_T1only

In [None]:
epoc_T1only.head()

In [26]:
# all variables
epoc_T1only.to_csv(f'{configs.PROJECT_ROOT}/EPOC/csv_files/EPOC_T1_pat_90.csv', index=False)

#### Save file for patients that have rs-fMRI scans

In [None]:
pat_labels_fMRI = fnmatch.filter(os.listdir(f'{configs.PROJECT_ROOT}/EPOC/halfpipe_patients_only'), 'sub-epocp*')
pat_labels_fMRI.sort()
pat_labels_fMRI

In [29]:
pat_labels_fMRI_new = []
for subj in pat_labels_fMRI:
    
    subj_new = subj[4:8] + '_' + subj[8] + '_' + subj[9:None]
    pat_labels_fMRI_new.append(subj_new)

In [30]:
len(pat_labels_fMRI_new)

93

In [31]:
epoc_fMRI = epoc.loc[epoc['Code'].isin(pat_labels_fMRI_new)]

In [32]:
epoc_fMRI.shape

(80, 342)

In [None]:
epoc_fMRI.head()

In [34]:
epoc_fMRI.to_csv(f'{configs.PROJECT_ROOT}/EPOC/csv_files/EPOC_fMRI_pat_80.csv', index=False)