In [1]:
import os
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm

from sklearn.preprocessing import MinMaxScaler
import pingouin as pg
import numpy as np

In [4]:
homedir = '/home/raghuram/Desktop/radiomics/TEXTURES/'
os.chdir(homedir)

In [22]:
t1ce_file = 'expt_t1ce.csv'
t1w_file = 'expt_t1w.csv'
t2f_file = 'expt_t2f.csv'
t2w_file = 'expt_t2w.csv'

In [6]:
def pre_process_dataframe(csv_file_name, experiment_number):
    # Binarize magnetization strength values
    # Scanner names and manufacturer are categorical 
    # One hot encode them
    
    # Code for the above here
    # Experiments in ascending order
    
    experiment_df = pd.read_csv(csv_file_name)
    experiment_df = experiment_df[experiment_df['experiment_number'] == experiment_number]
    fill_values = {'mag_field_strength':1.5}
    experiment_df.fillna(value=fill_values, inplace=True)
    experiment_df['mag_field_strength_binarized'] = (experiment_df['mag_field_strength']>=1.5).astype(int)
    experiment_df.drop(columns=[ 'Tumor','experiment_number', 'scale', 'algo', 'ng', 'mat_file_name', 'sequence_name', 'flip_angle', 
                                'VOLUME_ET', 'VOLUME_NET', 'VOLUME_ED', 'VOLUME_TC',
                               'VOLUME_BRAIN', 'mag_field_strength'], inplace=True)
    
    experiment_df.dropna(subset=['VOLUME_WT'], inplace=True)
    experiment_df = pd.get_dummies(experiment_df, columns=['scanner_manufacturer', 'scanner_model'], drop_first=True)
    radiomics_features  = list(experiment_df.columns)[:42]

    radiomics_df = experiment_df[radiomics_features]
    scaled_df = experiment_df.drop(columns=radiomics_features)
    scaled_df = scaled_df - scaled_df.min()/(scaled_df.max()-scaled_df.min())
  
    
    return (scaled_df, radiomics_df)
        


In [19]:
def results_summary_to_dataframe(results,response_variable):
    '''take the result of an statsmodel results table and transforms it into a dataframe'''
    pvals = results.pvalues
    coeff = results.params
    ci_bounds = results.conf_int()
 
    results_df = pd.DataFrame({"pvals":pvals,
                               "coeff":coeff,
                               "response_variable":response_variable,
                              })
    #Reordering...
    results_df = results_df[["coeff","pvals", "response_variable"]]
    return results_df

In [20]:
def linear_regression(results_folder, radiomic_df, scaled_df, experiment_number):
    # Load the data corresponding to the sequence argument here
    # Perform regression with Tr and Te 
    
    radiomic_features = list(radiomic_df.columns)
    y = np.array(radiomic_df)
    X = np.array(scaled_df)
    X = sm.add_constant(X)
    result_list = []
    for idx, column in enumerate(radiomic_features):
        
        print('Feature is {}'.format(column))
        model = sm.OLS(y[:, idx], X)
        results = model.fit()
        results_df = results_summary_to_dataframe(results, column)
        result_list.append(results_df)
       
        
    pd.concat(result_list).to_csv(os.path.join(results_folder, str(experiment_number)+'_'+'t1w.csv'), index=False)

In [None]:
# results_folder = '/home/raghuram/Desktop/radiomics/TEXTURES/results/t2w/linear_regression'
for expt_number in range(1, 26):
    scaled_df, radiomics_df = pre_process_dataframe(t2w_file, expt_number)
    linear_regression(results_folder, radiomics_df, scaled_df, expt_number) 

In [44]:
df = pd.read_csv(t2f_file)

In [37]:
clinical_df = pd.read_csv('TCGA_clinical_INFO.csv')
subset_clinical_df = clinical_df[['Tumor', '1p_19q_co_del_status', 
                                            'IDH1']]

In [38]:
df = pd.merge(df, subset_clinical_df, how='outer', on='Tumor')

In [45]:
df.drop(columns=['seriesinstanceuid'], inplace=True)
df.to_csv('expt_t1ce.csv', index=False)

KeyError: "['seriesinstanceuid'] not found in axis"