In [None]:
import os
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm

from pydicom import dcmread
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from numpy.linalg import matrix_rank, qr, inv, solve, lstsq
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, Ridge

In [None]:
homedir = '/home/raghuram/Desktop/radiomics/TEXTURES/'
os.chdir(homedir)

In [None]:
t1_list = sorted(glob.glob('*_T1W_features.csv'))
t2_list = sorted(glob.glob('*_T2W_features.csv'))
t1ce_list = sorted(glob.glob('*_T1CE_features.csv'))
flair_list = sorted(glob.glob('*_T2F_features.csv'))

In [None]:
def pre_process_dataframe(csv_file_name, experiment_number):
    # Binarize magnetization strength values
    # Scanner names and manufacturer are categorical 
    # One hot encode them
    
    # Code for the above here
    # Experiments in ascending order
    
    experiment_df = pd.read_csv(csv_file_name)
    experiment_df = experiment_df[experiment_df['experiment_number'] == experiment_number]
    experiment_df.drop(columns=['experiment_number', 'scale', 'algo', 'ng', 'mat_file_name', 'parameters_Ng', 
                       'parameters_Scale', 'parameters_Algo', 'sequence_name', 'patient_name', 'flip_angle'], inplace=True)
    fill_values = {'mag_field_strength':1.5}
    experiment_df['mag_field_strength'].fillna(value=fill_values)
    experiment_df['mag_field_strength_binarized'] = (experiment_df['mag_field_strength']<=1.5).astype(int)
    experiment_df = pd.get_dummies(experiment_df, prefix=['col1', 'col2'])
    experiment_df.drop(columns=['col2_Siemens', 'col1_Intera Achieva'], inplace=True)
    tr = experiment_df['repetition_time'].to_numpy()
    te = experiment_df['excitation_time'].to_numpy()
    experiment_df.drop(columns=['mag_field_strength','excitation_time', 'repetition_time'], inplace=True)  
    print(experiment_df.columns)
    X = experiment_df.to_numpy()
    return X, tr, te
        


In [None]:
def transform_data(X, tr, te ):
    X, tr, te = shuffle(X, tr, te, random_state=5)
    return X, tr, te
    

In [None]:
def linear_regression(X, tr, te, expt_number, results_folder, response_variable):
    # Load the data corresponding to the sequence argument here
    # Perform regression with Tr and Te 
    
    if response_variable == 'tr':
        te = np.reshape(te, (te.shape[0], 1))
        X = np.hstack((X, te))
        
        scaler = MinMaxScaler()
        X = scaler.fit_transform(X)
        
        tr /= 1000
        
        model = sm.OLS(tr, X).fit()
        predictions = model.predict(X)
        
        filename='expt_t1w_TR'+'_'+str(expt_number)+'.csv'
        with open(os.path.join(results_folder, filename), 'w') as csvfile:
            csvfile.write(model.summary().as_csv())

    elif response_variable == 'te':
        tr = np.reshape(tr, (tr.shape[0], 1))
        X = np.hstack((X, tr))
        
        
        te /= 1000
        
        scaler = MinMaxScaler()
        X = scaler.fit_transform(X)
        
        model = sm.OLS(te, X).fit()
        predictions = model.predict(X)
        
        filename='expt_t1w_TE'+'_'+str(expt_number)+'.csv'
        with open(os.path.join(results_folder, filename), 'w') as csvfile:
            csvfile.write(model.summary().as_csv())
    
   

In [None]:
results_folder = '/home/raghuram/Desktop/radiomics/TEXTURES/results/t1w/linear_regression/TE'
for expt_number in range(1, 26):
    X, tr, te = pre_process_dataframe('expt_t1w.csv', expt_number)
    od = transform_data(X, tr, te)
    linear_regression(X, tr, te, expt_number, results_folder, 'te')