# Make Features
This notebook loads the outputs from the fMRI preprocessing pipeline in MATLAB and transforms them to feature vectors which will be used to train a seizure classifier. 

Currently only loading features extracted from the TPM pipeline and Lesion Overlaps, which that are pipeline independent.

In [1]:
from scipy.io import loadmat
import numpy as np 
import pandas as pd 


In [2]:
# notebook to load and structure all the processed fMRI variables

# start by loading everything that is needed for hypothesis testing
# subject list [1,2,4,6:11,13:25,27:29,31:36,39:41,43:44]
subjs=[1,2,4,*range(6,12),*range(13,26),27,28,29,*range(31,37),*range(39,42),43,44]
Pipes=['tpm']
Atlas=['AAL','SCH']
AAL_ind=[val for val in range(166)]
SCH_ind=[val for val in range(166,266)]

data_path="../../../../data" #there has to be a better way to get this info, but I don't have time to research it now
raw_data_path=f"{data_path}/raw/fMRI"
intermediate_data_path=f"{data_path}/intermediate/fMRI"
processed_data_path=f"{data_path}/processed/fMRI"


In [3]:
def lesion_overlap_load(atlas,save_flag=False):
    ''' Loads the lesion overlaps for a given atlas and then saves them in processed if save_flag
    '''
    les_load=loadmat(f'{raw_data_path}/Lesion_Overlap_{atlas.upper()}.mat')
    #save overlap in an easily accessible way
    overlap=np.array(les_load["overlap"])
    roi_names=les_load["ROInames"][0][0][0] #the export from matlab produced a weird nested structure here which is inconvenient
    subjects=les_load["subjects"][0]

    if save_flag:
        np.save(f'{processed_data_path}/Lesion_Overlap_{atlas.upper()}.npy',overlap)

        with open(f'{processed_data_path}/Lesion_Overlap_{atlas.upper()}_ROIs.txt','w') as f:
            for roi in roi_names:
                f.write(f"{roi[0]}\n")

        with open(f'{processed_data_path}/Lesion_Overlap_{atlas.upper()}_Subjects.txt','w') as f:
            for subj in subjects:
                f.write(f"{subj}\n")
    
    return overlap,roi_names,subjects 


In [4]:
def strength_load(atlas,save_flag=False):
    ''' Loads the strength features for a given atlas from tpm pipeline and then saves them in processed if save_flag
    '''
    str_load=loadmat(f'{raw_data_path}/4mm_tpm.mat')
    if atlas.lower()=="sch":
        matrices=np.array(str_load["Schaefer_mats"])
    else:
        matrices=np.array(str_load[f"{atlas.upper()}_mats"])

    dim1,dim2,num_subjs=matrices.shape
    mean_str_pos=np.zeros((dim1,num_subjs))
    mean_str_neg=np.zeros((dim1,num_subjs))
    for subj in range(num_subjs):
        for dim in range(dim1):
            col=matrices[dim,:,subj]
            mean_str_pos[dim,subj]=col[col>0].mean()
            mean_str_neg[dim,subj]=col[col<0].mean()
    
    if save_flag:
        np.save(f'{processed_data_path}/Mean_Str_Pos_{atlas.upper()}.npy',mean_str_pos)
        np.save(f'{processed_data_path}/Mean_Str_Neg_{atlas.upper()}.npy',mean_str_neg)

    return mean_str_pos,mean_str_neg

In [5]:
def make_feature_df(atlas,save_flag=False):
    ''' Makes the feature dataframe with Overlap, Pos strength, and Neg strength'''
    labels=pd.read_csv(f'{raw_data_path}/fMRI_labels.csv')

    overlap,roi_names,subjects=lesion_overlap_load(atlas)
    mean_str_pos,mean_str_neg=strength_load(atlas)

    fMRI_features=pd.DataFrame()


    for index, row in labels.iterrows():
        subject_ind=np.where(np.array(subjects)==int(row["Subject Number"][-2:]))[0]
        if subject_ind.size>0:
            subject_dict=row.to_dict()
            for roi_ind,roi in enumerate(roi_names):
                subject_dict[f"Overlap {roi[0]}"]=overlap[roi_ind,subject_ind][0]
                subject_dict[f"Mean Str Pos {roi[0]}"]=mean_str_pos[roi_ind,subject_ind][0]
                subject_dict[f"Mean Str Neg {roi[0]}"]=mean_str_neg[roi_ind,subject_ind][0]
            fMRI_features=fMRI_features.append(subject_dict,ignore_index=True)
    if save_flag:
        fMRI_features.to_csv(f'{processed_data_path}/fMRI_features_{atlas.upper()}.csv')

    return fMRI_features



In [6]:
fMRI_features=make_feature_df("aal",save_flag=True)
fMRI_features=make_feature_df("sch",save_flag=True)
