In [1]:
# RUS implementation

In [2]:
import numpy as np
import pandas as pd
import os, warnings
warnings.filterwarnings(action='ignore')
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import RidgeClassifierCV, RidgeCV, LassoCV, ElasticNetCV

In [3]:
repeat = 100
alphas = np.logspace(-4, 4, 17)
num_CV = 5

In [4]:
df = pd.read_csv('final_data.csv')

training_df = df.loc[df['Outside'] == 0]
test_df = df.loc[df['Outside'] == 1]

X_train_full = training_df.iloc[:, 5:]
y_train = training_df.iloc[:, 1 ]
X_test_full = test_df.iloc[:, 5:]
y_test = test_df.iloc[:, 1 ]

datasets = ['X_T2', 'X_T1', 'X_T1T2', 'X_adc']
estimators = ['ridge', 'lasso', 'elastic']
for d, dataset in enumerate(datasets):
    if dataset == 'X_adc_T1_T2':
        X = X_train_full
        X_test = X_test_full
    elif dataset == 'X_T1':
        X = X_train_full.iloc[:, :107]
        X_test = X_test_full.iloc[:, :107]
    elif dataset == 'X_T2':
        X = X_train_full.iloc[:,107:214]
        X_test = X_test_full.iloc[:,107:214]
    elif dataset == 'X_adc_T1':
        X = X_train_full.iloc[:, np.r_[0:107, 214:321]]
        X_test = X_test_full.iloc[:, np.r_[0:107, 214:321]]
    elif dataset == 'X_adc_T2':
        X = X_train_full.iloc[:,107:321]
        X_test = X_test_full.iloc[:,107:321]
    elif dataset == 'X_adc':
        X = X_train_full.iloc[:, 214:]
        X_test = X_test_full.iloc[:, 214:]
    elif dataset == 'X_T1_T2':
        X = X_train_full.iloc[:, :214]
        X_test = X_test_full.iloc[:, :214]
    
    for _, estimator in enumerate(estimators):
        if estimator == 'ridge':
            classifier = RidgeCV(alphas = alphas, cv = num_CV)
        elif estimator =='lasso':
            classifier = LassoCV(alphas = alphas, cv = num_CV, random_state=seed)
        elif estimator =='elastic':
            classifier = ElasticNetCV(alphas=alphas, l1_ratio=0.5, cv = num_CV, random_state=seed)     
        print ("[ {} / {} ]".format(d, len(datasets)), "estimator:", estimator, "with dataset : ", dataset)
        AUC_trains = 0
        AUC_tests = 0        
        for seed in range(repeat):    
            rus = RandomUnderSampler(random_state=seed)
            X_sampled, y_sampled = rus.fit_resample(X,y_train)
            classifier.fit(X_sampled, y_sampled)
            preds_train = classifier.predict(X)
            preds_test = classifier.predict(X_test)
            performance_train = roc_auc_score(y_train, preds_train)
            performance_test = roc_auc_score(y_test, preds_test)
            AUC_trains += performance_train
            AUC_tests += performance_test  
        print ('training AUC:', AUC_trains/repeat)
        print ('validation AUC:',AUC_tests/repeat)        

[ 0 / 4 ] estimator: ridge with dataset :  X_T2
training AUC: 0.7114540816326529
validation AUC: 0.570761904761905
[ 0 / 4 ] estimator: lasso with dataset :  X_T2
training AUC: 0.6175680272108844
validation AUC: 0.4861587301587301
[ 0 / 4 ] estimator: elastic with dataset :  X_T2
training AUC: 0.6248129251700679
validation AUC: 0.4934603174603173
[ 1 / 4 ] estimator: ridge with dataset :  X_T1
training AUC: 0.7673894557823128
validation AUC: 0.46879365079365065
[ 1 / 4 ] estimator: lasso with dataset :  X_T1
training AUC: 0.6803741496598641
validation AUC: 0.45720634920634884
[ 1 / 4 ] estimator: elastic with dataset :  X_T1
training AUC: 0.6786989795918369
validation AUC: 0.45790476190476154
[ 2 / 4 ] estimator: ridge with dataset :  X_T1T2
training AUC: 0.7673894557823128
validation AUC: 0.46879365079365065
[ 2 / 4 ] estimator: lasso with dataset :  X_T1T2
training AUC: 0.6803741496598641
validation AUC: 0.45720634920634884
[ 2 / 4 ] estimator: elastic with dataset :  X_T1T2
training