In [None]:
import os
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm

from pydicom import dcmread
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from numpy.linalg import matrix_rank, qr, inv, solve, lstsq
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, Ridge

In [None]:
homedir = '/home/raghuram/Desktop/radiomics/TEXTURES/'
os.chdir(homedir)

In [None]:
t1_list = sorted(glob.glob('*_T1W_features.csv'))
t2_list = sorted(glob.glob('*_T2W_features.csv'))
t1ce_list = sorted(glob.glob('*_T1CE_features.csv'))
flair_list = sorted(glob.glob('*_T2F_features.csv'))

In [None]:
def pre_process_dataframe(csv_file_name, experiment_number):
    # Binarize magnetization strength values
    # Scanner names and manufacturer are categorical 
    # One hot encode them
    
    # Code for the above here
    # Experiments in ascending order
    
    experiment_df = pd.read_csv(csv_file_name)
    experiment_df = experiment_df[experiment_df['experiment_number'] == experiment_number]
    experiment_df.drop(columns=['experiment_number', 'scale', 'algo', 'ng', 'mat_file_name', 'parameters_Ng', 
                       'parameters_Scale', 'parameters_Algo', 'sequence_name', 'patient_name', 'flip_angle'], inplace=True)
    fill_values = {'mag_field_strength':1.5}
    experiment_df['mag_field_strength'].fillna(value=fill_values)
    experiment_df['mag_field_strength_binarized'] = (experiment_df['mag_field_strength']<=1.5).astype(int)
    experiment_df = pd.get_dummies(experiment_df, prefix=['col1', 'col2'])
    experiment_df.drop(columns=['col2_Siemens', 'col1_Intera Achieva'], inplace=True)
    repetition_time = experiment_df['repetition_time'].to_numpy()
    excitation_time = experiment_df['excitation_time'].to_numpy()
    experiment_df.drop(columns=['mag_field_strength','excitation_time', 'repetition_time'], inplace=True)  
#     print(experiment_df.columns)
    X = experiment_df.to_numpy()
#     print(X.shape)
    return X, repetition_time, excitation_time
        


In [None]:
def split_data_train_test( numpy_array, y1, y2 ):
    X, y1, y2 = shuffle(numpy_array, y1, y2, random_state=5)
    y1, y2 = y1/1000, y2/1000
    
#     print(X.shape)
    print(matrix_rank(X))
    # Split the preprocessed data into train and test
    X_train, X_test, y_train_te, y_test_te = train_test_split(X, y1, random_state=5)
    X_train, X_test, y_train_tr, y_test_tr = train_test_split(X, y2, random_state=5)
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    X_train = scaler.fit_transform(X_train)
    X_train = sm.add_constant(X_train)
    X_test = scaler.transform(X_test)
    X_test = sm.add_constant(X_test)
    
    output_dict = {}
    output_dict['X'] = X
    output_dict['y1'] = y1
    output_dict['y2'] = y2
    output_dict['X_train'] = X_train
    output_dict['X_test'] = X_test
    output_dict['y_tr_train'] = y_train_tr
    output_dict['y_tr_test'] = y_test_tr
    output_dict['y_te_train'] = y_train_te
    output_dict['y_te_test'] = y_test_te
    
    return output_dict
    

In [None]:
def linear_regression(output_dict, expt_number, results_folder):
    # Load the data corresponding to the sequence argument here
    # Perform regression with Tr and Te 
    
    model = sm.OLS(od['y2'], od['X']).fit()
    predictions = model.predict(od['X'])
    filename='expt_t2f_TE'+'_'+str(expt_number)+'.csv'
    with open(os.path.join(results_folder, filename), 'w') as csvfile:
        csvfile.write(model.summary().as_csv())

In [None]:
results_folder = '/home/raghuram/Desktop/radiomics/TEXTURES/results/t2f/linear_regression/TE'
for expt_number in range(1, 26):
    X, tr, te = pre_process_dataframe('expt_t2f.csv', expt_number)
    od = split_data_train_test(X, tr, te)
    linear_regression(od, expt_number,results_folder)