In [1]:
import os
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm

from sklearn.preprocessing import MinMaxScaler
import pingouin as pg
import numpy as np

In [76]:
homedir = '/home/raghuram/Desktop/radiomics/TEXTURES/'
os.chdir(homedir)

In [77]:
t1ce_file = 'expt_t1ce.csv'
t1w_file = 'expt_t1w.csv'
t2f_file = 'expt_t2f.csv'
t2w_file = 'expt_t2w.csv'

In [119]:
def pre_process_dataframe(csv_file_name, experiment_number):
    # Binarize magnetization strength values
    # Scanner names and manufacturer are categorical 
    # One hot encode them
    
    # Code for the above here
    # Experiments in ascending order
    
    experiment_df = pd.read_csv(csv_file_name)
    experiment_df = experiment_df[experiment_df['experiment_number'] == experiment_number]
    fill_values = {'mag_field_strength':1.5}
    experiment_df.fillna(value=fill_values, inplace=True)
    experiment_df['mag_field_strength_binarized'] = (experiment_df['mag_field_strength']>=1.5).astype(int)
    experiment_df.drop(columns=[ 'Tumor','experiment_number', 'mat_file_name', 'scale', 'algo', 'ng', 'sequence_name', 'flip_angle', 
                                'VOLUME_ET', 'VOLUME_NET', 'VOLUME_ED', 'VOLUME_TC',
                               'VOLUME_BRAIN', 'mag_field_strength'], inplace=True)
    
    experiment_df.dropna(subset=['VOLUME_WT'], inplace=True)
    experiment_df = pd.get_dummies(experiment_df, columns=['scanner_manufacturer', 'scanner_model'], drop_first=True)
    radiomics_features  = list(experiment_df.columns)[:42]

    radiomics_df = experiment_df[radiomics_features]
    scaled_df = experiment_df.drop(columns=radiomics_features)

    scaled_df = scaled_df - scaled_df.min()/(scaled_df.max()-scaled_df.min())
    
    return (scaled_df, radiomics_df)
        


In [120]:
def results_summary_to_dataframe(results,response_variable):
    '''take the result of an statsmodel results table and transforms it into a dataframe'''
    pvals = results.pvalues
    coeff = results.params
    ci_bounds = results.conf_int()
 
    results_df = pd.DataFrame({"pvals":pvals,
                               "coeff":coeff,
                               "response_variable":response_variable,
                              })
    #Reordering...
    results_df = results_df[["coeff","pvals", "response_variable"]]
    return results_df

In [124]:
def linear_regression(results_folder, radiomic_df, scaled_df, experiment_number):
    # Load the data corresponding to the sequence argument here
    # Perform regression with Tr and Te 
    
    radiomic_features = list(radiomic_df.columns)
    y = np.array(radiomic_df)
    X = np.array(scaled_df)
    X = sm.add_constant(X)
    result_list = []
    for idx, column in enumerate(radiomic_features):
        
        model = sm.OLS(y[:, idx], X)
        results = model.fit()
        results_df = results_summary_to_dataframe(results, column)
#         results_df['significant'] = (results_df['pvals']<0.05).astype(bool)
        result_list.append(results_df)
       
    pd.concat(result_list).to_csv(os.path.join(results_folder, str(experiment_number)+'_'+'t2w.csv'), index=False)

In [125]:
results_folder = '/home/raghuram/Desktop/radiomics/TEXTURES/results/t2w/linear_regression'
for expt_number in range(1, 26):
    scaled_df, radiomics_df = pre_process_dataframe(t2w_file, expt_number)
    linear_regression(results_folder, radiomics_df, scaled_df, expt_number) 
    print('Finished experiment {}'.format(expt_number))

Finished experiment 1
Finished experiment 2
Finished experiment 3
Finished experiment 4
Finished experiment 5
Finished experiment 6
Finished experiment 7
Finished experiment 8
Finished experiment 9
Finished experiment 10
Finished experiment 11
Finished experiment 12
Finished experiment 13
Finished experiment 14
Finished experiment 15
Finished experiment 16
Finished experiment 17
Finished experiment 18
Finished experiment 19
Finished experiment 20
Finished experiment 21
Finished experiment 22
Finished experiment 23
Finished experiment 24
Finished experiment 25


In [127]:
results_folder = '/home/raghuram/Desktop/radiomics/TEXTURES/results/t1w/linear_regression'

In [126]:
os.chdir(results_folder)
csv_files_list = glob.glob('*.csv')

for csv_file in csv_files_list:
    df = pd.read_csv(csv_file)
    df['significant'] = (df['pvals']<0.05).astype(bool)
    df.to_csv(csv_file, index=False)