In [334]:
import os
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm

from pydicom import dcmread
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from numpy.linalg import matrix_rank, qr, inv, solve, lstsq
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, Ridge

In [238]:
homedir = '/home/raghuram/Desktop/radiomics/TEXTURES/csv_folder/'
os.chdir(homedir)

In [239]:
t1_list = sorted(glob.glob('seq_file_*_T1W.csv'), key = lambda _: int(_.split('_')[-2]))
t2_list = sorted(glob.glob('*seq_file_*_T2W.csv'), key = lambda _: int(_.split('_')[-2]))
t1ce_list = sorted(glob.glob('seq_file_*_T1CE.csv'), key = lambda _: int(_.split('_')[-2]))
flair_list = sorted(glob.glob('seq_file_*_T2F.csv'), key = lambda _: int(_.split('_')[-2]))

In [419]:
def pre_process_dataframe(experiment_features):
    # Binarize magnetization strength values
    # Scanner names and manufacturer are categorical 
    # One hot encode them
    
    # Code for the above here
    # Experiments in ascending order

    experiment_df = pd.read_csv(experiment_features)
    experiment_df.drop(columns=['Unnamed: 0','experiment_number', 'scale', 'algo', 'ng', 'mat_file_sequence', 'mat_file_name',
                               'parameters_Ng', 'parameters_Scale', 'parameters_Algo','filename','scanning_seq_mri'], inplace=True)
    experiment_df['mag_field_strength_binarized'] = (experiment_df['mag_field_strength']<=1.5).astype(int)
    experiment_df.drop(columns=['mag_field_strength'], inplace=True)
    experiment_df.replace({'Philips Healthcare':'Philips', 'Philips Medical Systems': 'Philips',
                          'SIEMENS':'Siemens'}, inplace=True)
    experiment_df = pd.get_dummies(experiment_df, prefix=['col1', 'col2'])
    repetition_time = experiment_df['repetition_time'].to_numpy()
    excitation_time = experiment_df['excitation_time'].to_numpy()
    experiment_df.drop(columns=['excitation_time', 'repetition_time'], inplace=False)
    X = experiment_df.to_numpy()
  
    return X, excitation_time, repetition_time

        

In [420]:
X, tr, te = pre_process_dataframe(t1_list[1])

In [421]:
def split_data_train_test(experiment_number, numpy_array, y1, y2 ):
    X, y1, y2 = shuffle(numpy_array, y1, y2, random_state=5)
    y1 = y1/1000
    
#     print(X.shape)
#     print(matrix_rank(X))
    # Split the preprocessed data into train and test
    X_train, X_test, y_train_te, y_test_te = train_test_split(X, y1, random_state=5)
    X_train, X_test, y_train_tr, y_test_tr = train_test_split(X, y2, random_state=5)
    
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    X_train = scaler.fit_transform(X_train)
#     X_train = sm.add_constant(X_train)
    X_test = scaler.transform(X_test)
#     X_test = sm.add_constant(X_test)
    
    output_dict = {}
    output_dict['X'] = X
    output_dict['y1'] = y1
    output_dict['y2'] = y2
    output_dict['X_train'] = X_train
    output_dict['X_test'] = X_test
    output_dict['y_tr_train'] = y_train_tr
    output_dict['y_tr_test'] = y_test_tr
    output_dict['y_te_train'] = y_train_te
    output_dict['y_te_test'] = y_test_te
    
    return output_dict
    

In [422]:
od = split_data_train_test(2, X, tr, te)

In [423]:
def linear_regression(output_dict, expt_number):
    # Load the data corresponding to the sequence argument here
    # Perform regression with Tr and Te 
    
    model = sm.OLS(od['y_tr_train'], od['X_train']).fit()
    predictions = model.predict(od['X_test'])
    print_model = model.summary()
    print(print_model)
    

In [424]:
from regressors import stats

In [427]:
alphas = np.linspace(0.01, 0.5)
# lm = RidgeCV(alphas=alphas, cv=10, fit_intercept=True)
lm = LinearRegression()
lm.fit(od['X'], od['y1'])

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [428]:
print(stats.summary(lm, od['X'], od['y1']))

Residuals:
 Min   1Q  Median   3Q  Max
-0.0 -0.0    -0.0  0.0  0.0


Coefficients:
            Estimate  Std. Error                                 t value   p value
_intercept  0.002124          0j            (21610332.3767-835711.4272j)  0.000000
x1         -0.000000         -0j                            (-2.6955-0j)  0.009000
x2         -0.000000          0j                       (-2.8741+0.0006j)  0.005519
x3         -0.000000          0j                            (-0.1923+0j)  0.848146
x4          0.000000          0j                        (0.5416-0.0001j)  0.590023
x5          0.000000         -0j                             (0.5975+0j)  0.552303
x6         -0.000000          0j                       (-1.1532+0.0009j)  0.253201
x7          0.000000          0j                         (0.8756-0.009j)  0.384556
x8          0.000000          0j                        (4.8437-0.0639j)  0.000009
x9         -0.000000          0j                       (-9.8794+0.3766j)  0.000000
x10 

  return (r_squared / p) / ((1 - r_squared) / (n - p - 1))
