In [41]:
import pandas as pd 
import numpy as np
import string
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import csv
from sklearn.svm import SVC


This notebook runs RF across 4 problem sets across 5 trials. 
The values for Table 2 and Table 3 are recorded at each iteration of the loop. 

### Datasets

### ADULT

In [34]:
ADULT_data = pd.read_csv('adult.data.csv', names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 
                      'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 
                      'native-country', 'target_income'
                     ]) 

ADULT_data['target_income'] = ADULT_data['target_income'].str.strip()

ADULT_data['target_income'] = ADULT_data.target_income.map( {'<=50K':0 , '>50K':1} )

ADULT_one_hot_data = pd.get_dummies(ADULT_data, 
                                    columns = ['workclass', 'education', 'marital-status',
                                            'occupation', 'relationship', 'race', 'sex', 'native-country'],
                                    prefix = ['workclass', 'education', 'marital-status',
                                            'occupation', 'relationship', 'race', 'sex', 'native-country'] )

ADULT_one_hot_data = ADULT_one_hot_data.drop(['workclass_ ?', 
                                              'occupation_ ?', 'native-country_ ?'], axis=1)


ADULT_one_hot_data[['age', 'fnlwgt', 'education-num', 'capital-gain', 
   'capital-loss', 'hours-per-week']] = StandardScaler().fit_transform(ADULT_one_hot_data[['age', 'fnlwgt', 'education-num', 'capital-gain', 
                                                                                           'capital-loss', 'hours-per-week']])


ADULT_one_hot_data


Unnamed: 0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week,target_income,workclass_ Federal-gov,workclass_ Local-gov,workclass_ Never-worked,...,native-country_ Portugal,native-country_ Puerto-Rico,native-country_ Scotland,native-country_ South,native-country_ Taiwan,native-country_ Thailand,native-country_ Trinadad&Tobago,native-country_ United-States,native-country_ Vietnam,native-country_ Yugoslavia
0,0.030671,-1.063611,1.134739,0.148453,-0.21666,-0.035429,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0.837109,-1.008707,1.134739,-0.145920,-0.21666,-2.222153,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,-0.042642,0.245079,-0.420060,-0.145920,-0.21666,-0.035429,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,1.057047,0.425801,-1.197459,-0.145920,-0.21666,-0.035429,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,-0.775768,1.408176,1.134739,-0.145920,-0.21666,-0.035429,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,-0.849080,0.639741,0.746039,-0.145920,-0.21666,-0.197409,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
32557,0.103983,-0.335433,-0.420060,-0.145920,-0.21666,-0.035429,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
32558,1.423610,-0.358777,-0.420060,-0.145920,-0.21666,-0.035429,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
32559,-1.215643,0.110960,-0.420060,-0.145920,-0.21666,-1.655225,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


### Balance of dataset

In [32]:
positive_labels = ADULT_data['target_income'].value_counts()[1]/ ADULT_data['target_income'].count() * 100
negative_labels = ADULT_data['target_income'].value_counts()[0]/ ADULT_data['target_income'].count() * 100
print("% of negative labels:", negative_labels)
print("% of positive labels:", positive_labels)

% of negative labels: 75.91904425539757
% of positive labels: 24.080955744602438


Unbalanced dataset

### COV_type data

In [35]:
COV_type_data = pd.read_csv('covtype.data.gz', header = None)

cols = [c for c in COV_type_data.columns]
cols[-1] = 'forest_cover'
COV_type_data.columns = cols



largest_class = COV_type_data['forest_cover'].value_counts().idxmax()

COV_type_data.loc[COV_type_data['forest_cover'] != largest_class, 'forest_cover'] = 0
COV_type_data.loc[COV_type_data['forest_cover'] == largest_class, 'forest_cover'] = 1



COV_type_data.iloc[:, :-1] = StandardScaler().fit_transform(COV_type_data.iloc[:, :-1])

COV_type_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,45,46,47,48,49,50,51,52,53,forest_cover
0,-1.297805,-0.935157,-1.482820,-0.053767,-0.796273,-1.180146,0.330743,0.439143,0.142960,3.246283,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,0
1,-1.319235,-0.890480,-1.616363,-0.270188,-0.899197,-1.257106,0.293388,0.590899,0.221342,3.205504,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,0
2,-0.554907,-0.148836,-0.681563,-0.006719,0.318742,0.532212,0.816364,0.742654,-0.196691,3.126965,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,1
3,-0.622768,-0.005869,0.520322,-0.129044,1.227908,0.474492,0.965786,0.742654,-0.536343,3.194931,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,1
4,-1.301377,-0.988770,-1.616363,-0.547771,-0.813427,-1.256464,0.293388,0.540313,0.195215,3.165479,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581007,-2.012130,-0.023740,0.787408,-0.867697,-0.504653,-1.437962,1.040496,0.692069,-0.640851,-0.863386,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,0
581008,-2.029988,-0.032675,0.653865,-0.952383,-0.590424,-1.446299,1.040496,0.692069,-0.614724,-0.857345,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,0
581009,-2.047847,0.029873,0.386780,-0.985317,-0.676194,-1.449506,0.891075,0.894409,-0.327327,-0.850548,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,0
581010,-2.054990,0.128163,0.119694,-0.985317,-0.710502,-1.449506,0.666942,1.096749,0.012325,-0.842997,...,-0.315238,-0.290284,-0.05273,-0.057143,-0.014313,-0.022653,-0.165956,-0.156014,-0.123654,0


### Balance of dataset

#### Treat largest class as positive class. The rest are negative. 


In [37]:
# positive_labels = len(COV_type_data[COV_type_data['Forest cover'] == 7])/len(COV_type_data['Forest cover']) * 100

positive_labels = COV_type_data['forest_cover'].value_counts().max()/len(COV_type_data['forest_cover']) * 100
negative_labels = len(COV_type_data[COV_type_data['forest_cover'] != COV_type_data['forest_cover'].value_counts().idxmax()])/len(COV_type_data['forest_cover']) * 100

print("% of negative labels:", negative_labels)
print("% of positive labels:", positive_labels)

% of negative labels: 48.75992234239568
% of positive labels: 51.240077657604324


### LETTER

In [5]:
LETTER_p1 = pd.read_csv('letter-recognition.data', header = None)

cols = [c for c in LETTER_p1.columns]
cols[0] = 'letter'
LETTER_p1.columns = cols



LETTER_p1.loc[:, LETTER_p1.columns != 'letter'] = StandardScaler().fit_transform(LETTER_p1.loc[:, LETTER_p1.columns != 'letter'])
LETTER_p1

Unnamed: 0,letter,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,T,-1.057698,0.291877,-1.053277,-0.164704,-1.144013,0.544130,2.365097,-1.714360,0.344994,-0.917071,1.347774,0.034125,-1.305948,-0.219082,-1.438153,0.122911
1,I,0.510385,1.502358,-1.053277,0.719730,-0.687476,1.531305,-1.075326,0.137561,-0.495072,1.895968,-1.312807,0.514764,-0.448492,-0.219082,0.120081,1.359441
2,D,-0.012309,1.199738,0.435910,1.161947,1.138672,1.531305,-0.645273,-0.973591,0.344994,0.690380,-1.312807,-0.446513,-0.019764,-0.865626,-0.269477,0.741176
3,N,1.555774,1.199738,0.435910,0.277513,-0.230939,-0.936631,0.644886,-0.232823,0.344994,-1.720796,-0.932724,0.995402,1.266419,1.074008,-0.659036,0.122911
4,G,-1.057698,-1.826464,-1.053277,-1.933571,-1.144013,0.544130,-0.645273,0.507945,0.344994,-0.917071,-0.552641,0.514764,-0.877220,-0.865626,0.509640,1.359441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,D,-1.057698,-1.523844,-1.053277,-1.049137,-0.687476,0.050543,-0.215220,0.878329,0.344994,-0.917071,-0.172558,-1.888428,-0.448492,-0.219082,-0.269477,-0.495354
19996,C,1.555774,0.897117,1.428701,1.161947,0.225598,-1.430218,0.214833,0.507945,1.605094,1.494105,0.967691,2.437316,-0.448492,0.427463,-0.269477,-0.495354
19997,T,1.033079,0.594497,0.435910,0.719730,0.682135,-0.443044,1.504991,-0.603207,0.765028,1.092242,0.967691,-1.407789,-0.448492,2.367097,-0.659036,-2.350149
19998,S,-1.057698,-1.221224,-0.556881,-1.491354,-1.144013,0.544130,-0.215220,-0.973591,0.344994,0.690380,-0.172558,0.034125,-0.877220,0.427463,0.509640,0.122911


### Letter.p1 - treat O as positive class, rest as negative

#### Unbalanced dataset

In [6]:
O_list = ['O']

# LETTER_p1['letter'] = pd.to_numeric(LETTER_p1['letter'])

LETTER_p1.loc[~LETTER_p1['letter'].isin(O_list), 'letter'] = 0
LETTER_p1.loc[LETTER_p1['letter'].isin(O_list), 'letter'] = 1

LETTER_p1['letter'].value_counts()

0    19247
1      753
Name: letter, dtype: int64

In [7]:
positive_labels = len(LETTER_p1[LETTER_p1['letter'] == 1])/len(LETTER_p1['letter']) * 100
negative_labels = len(LETTER_p1[LETTER_p1['letter'] == 0])/len(LETTER_p1['letter']) * 100

print("% of negative labels:", negative_labels)
print("% of positive labels:", positive_labels)


% of negative labels: 96.235
% of positive labels: 3.765


### Letter.p2 - treat A-M as positive class, rest as negative

In [10]:
LETTER_p2 = pd.read_csv('letter-recognition.data', header = None)

cols = [c for c in LETTER_p2.columns]
cols[0] = 'letter'
LETTER_p2.columns = cols


LETTER_p2.loc[:, LETTER_p2.columns != 'letter'] = StandardScaler().fit_transform(LETTER_p2.loc[:, LETTER_p2.columns != 'letter'])

pos_alphabet_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M']
neg_alphabet_list = sorted(list(set(string.ascii_uppercase) - set(pos_alphabet_list)))




LETTER_p2.loc[LETTER_p2['letter'].isin(pos_alphabet_list), 'letter'] = 1
LETTER_p2.loc[LETTER_p2['letter'].isin(neg_alphabet_list), 'letter'] = 0

LETTER_p1["letter"] = LETTER_p1["letter"].astype(str).astype(int)
LETTER_p2["letter"] = LETTER_p2["letter"].astype(str).astype(int)

LETTER_p2

Unnamed: 0,letter,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0,-1.057698,0.291877,-1.053277,-0.164704,-1.144013,0.544130,2.365097,-1.714360,0.344994,-0.917071,1.347774,0.034125,-1.305948,-0.219082,-1.438153,0.122911
1,1,0.510385,1.502358,-1.053277,0.719730,-0.687476,1.531305,-1.075326,0.137561,-0.495072,1.895968,-1.312807,0.514764,-0.448492,-0.219082,0.120081,1.359441
2,1,-0.012309,1.199738,0.435910,1.161947,1.138672,1.531305,-0.645273,-0.973591,0.344994,0.690380,-1.312807,-0.446513,-0.019764,-0.865626,-0.269477,0.741176
3,0,1.555774,1.199738,0.435910,0.277513,-0.230939,-0.936631,0.644886,-0.232823,0.344994,-1.720796,-0.932724,0.995402,1.266419,1.074008,-0.659036,0.122911
4,1,-1.057698,-1.826464,-1.053277,-1.933571,-1.144013,0.544130,-0.645273,0.507945,0.344994,-0.917071,-0.552641,0.514764,-0.877220,-0.865626,0.509640,1.359441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,1,-1.057698,-1.523844,-1.053277,-1.049137,-0.687476,0.050543,-0.215220,0.878329,0.344994,-0.917071,-0.172558,-1.888428,-0.448492,-0.219082,-0.269477,-0.495354
19996,1,1.555774,0.897117,1.428701,1.161947,0.225598,-1.430218,0.214833,0.507945,1.605094,1.494105,0.967691,2.437316,-0.448492,0.427463,-0.269477,-0.495354
19997,0,1.033079,0.594497,0.435910,0.719730,0.682135,-0.443044,1.504991,-0.603207,0.765028,1.092242,0.967691,-1.407789,-0.448492,2.367097,-0.659036,-2.350149
19998,0,-1.057698,-1.221224,-0.556881,-1.491354,-1.144013,0.544130,-0.215220,-0.973591,0.344994,0.690380,-0.172558,0.034125,-0.877220,0.427463,0.509640,0.122911


### Well-balanced dataset

In [12]:
pos_alphabet_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M']
neg_alphabet_list = sorted(list(set(string.ascii_uppercase) - set(pos_alphabet_list)))


positive_labels = len(LETTER_p2[LETTER_p2['letter'] == 1])/len(LETTER_p2['letter']) * 100
negative_labels = len(LETTER_p2[LETTER_p2['letter'] == 0])/len(LETTER_p2['letter']) * 100

print("% of negative labels:", negative_labels)
print("% of positive labels:", positive_labels)


% of negative labels: 50.3
% of positive labels: 49.7


### Experiments 

Random Forests

In [13]:
def split_data(data, column):

    Y = data[column]
    X = data.drop([column], axis=1)
    
    X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=5000)
    
    return X_train, X_test, y_train, y_test

In [14]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split

In [36]:

ADULT_metric = []
COV_type_metric = []
LETTER_p1_metric = []
LETTER_p2_metric = []


accuracy_metric = []
f1_score_metric = []
roc_auc_score_metric = []


datalist = [COV_type_data, ADULT_one_hot_data, LETTER_p1, LETTER_p2]


for ind, data in enumerate(datalist):
    for i in range(5):
    
        print('Start of trial', i+1)
        
        # COV_type data
        
        
        if ind == 0:
            print('At COV_type_data')
            dataset = 'COV_type_data'
            X_train, X_test, y_train, y_test = split_data(data, 'forest_cover')
            
        
        # ADULT_data
        
        elif ind == 1:
            print('At ADULT_data')
            dataset = 'ADULT_data'
            X_train, X_test, y_train, y_test = split_data(data, 'target_income')
            
        
        # LETTER.p1 data
        
        if ind == 2:
            print('At LETTER_p1')
            dataset = 'LETTER_p1'
            X_train, X_test, y_train, y_test = split_data(data, 'letter')
            
        
        # LETTER.p2 data 
        if ind == 3:
            print('At LETTER_p2')
            dataset = 'LETTER_p2'
            X_train, X_test, y_train, y_test = split_data(data, 'letter')
          
        
        
        pipe = Pipeline([('classifier', RandomForestClassifier())
                       ])
        

        
        search_space = [
            { 'classifier': [RandomForestClassifier(n_estimators = 1024, class_weight = 'balanced')],
              'classifier__max_features': [1, 2, 4, 6, 8, 12, 16]
            }   
        ]
        
       
    # Create grid search 
        clf = GridSearchCV(pipe, search_space, cv=StratifiedKFold(n_splits=5), 
                   scoring=['accuracy', 'roc_auc', 'f1'], refit='accuracy',
                   verbose=0)
          
        
    # Fit grid search
        best_model = clf.fit(X_train, y_train)
        
    
        
        # print(best_model.cv_results_['rank_test_accuracy'])
    
    
        best_acc_param = best_model.cv_results_['params'][ np.argmin(best_model.cv_results_['rank_test_accuracy'])]
        best_auc_param = best_model.cv_results_['params'][np.argmin(best_model.cv_results_['rank_test_roc_auc'])] 
        best_f1_param = best_model.cv_results_['params'][np.argmin(best_model.cv_results_['rank_test_f1'])]
        
        
        # print(best_acc_param)
        
        
    # Train 3 models using the 5000 samples and each of the 3 best parameter settings (one model per metric)
    
    # Tuned for accuracy
        acc_model = best_acc_param['classifier'].fit(X_train, y_train)
    
    # Tuned for roc-auc score 
        auc_model = best_auc_param['classifier'].fit(X_train, y_train)
    
    # Tuned for f1 score
        f1_model =  best_f1_param['classifier'].fit(X_train, y_train)
        
        
        
        
        # fit a classifier using that best param on the training set, 
        # predict the training set, and record the corresponding training set metric for the appendix tables
        
       # On Training data
    
        y_pred_acc_tr = acc_model.predict(X_train)
        y_pred_auc_tr = auc_model.predict(X_train)
        y_pred_f1_tr = f1_model.predict(X_train)
        
        
        
        print('Trial ', i+1, ' raw training scores for', dataset)
        print(accuracy_score(y_train, y_pred_acc_tr))
        print(roc_auc_score(y_train, y_pred_auc_tr))
        print(f1_score(y_train, y_pred_f1_tr))
        
        
        # accuracy_metric_tr.append(accuracy_score(y_test, y_pred_acc_tr))
        # roc_auc_score_metric_tr.append(roc_auc_score(y_test, y_pred_auc_tr))
        # f1_score_metric_tr.append(f1_score(y_test, y_pred_f1_tr))
    
        
        # On Test data 
        
        y_pred_acc = acc_model.predict(X_test)
        y_pred_auc = auc_model.predict(X_test)
        y_pred_f1 = f1_model.predict(X_test)
        
        
        print('Trial ', i+1, ' raw test scores for', dataset)
        print(accuracy_score(y_test, y_pred_acc))
        print(roc_auc_score(y_test, y_pred_auc))
        print(f1_score(y_test, y_pred_f1))
    
    
        accuracy_metric.append(accuracy_score(y_test, y_pred_acc))
        roc_auc_score_metric.append(roc_auc_score(y_test, y_pred_auc))
        f1_score_metric.append(f1_score(y_test, y_pred_f1))
        
        
        # Table 3 
        if ind == 0:
            COV_type_metric.extend([accuracy_score(y_test, y_pred_acc), roc_auc_score(y_test, y_pred_auc), 
                                  f1_score(y_test, y_pred_f1)])
        elif ind == 1: 
            ADULT_metric.extend([accuracy_score(y_test, y_pred_acc), roc_auc_score(y_test, y_pred_auc), 
                                  f1_score(y_test, y_pred_f1)])
        elif ind == 2:
            LETTER_p1_metric.extend([accuracy_score(y_test, y_pred_acc), roc_auc_score(y_test, y_pred_auc), 
                                  f1_score(y_test, y_pred_f1)])
            
        elif ind == 3:
            LETTER_p2_metric.extend([accuracy_score(y_test, y_pred_acc), roc_auc_score(y_test, y_pred_auc), 
                                  f1_score(y_test, y_pred_f1)])
        
        
        print("End of Trial", i+1)
        print('------------------------------------------')
        print()
                                                               


    


Start of trial 1
At COV_type_data
Trial  1  raw training scores for COV_type_data
1.0
1.0
1.0
Trial  1  raw test scores for COV_type_data
0.8235088852315576
0.8242443630436314
0.8250412610122587
End of Trial 1
------------------------------------------

Start of trial 2
At COV_type_data
Trial  2  raw training scores for COV_type_data
1.0
1.0
1.0
Trial  2  raw test scores for COV_type_data
0.8185610716443408
0.8188204130922623
0.8167814072862484
End of Trial 2
------------------------------------------

Start of trial 3
At COV_type_data
Trial  3  raw training scores for COV_type_data
1.0
1.0
1.0
Trial  3  raw test scores for COV_type_data
0.8249880210828942
0.8252495725808819
0.8232435076719146
End of Trial 3
------------------------------------------

Start of trial 4
At COV_type_data
Trial  4  raw training scores for COV_type_data
1.0
1.0
1.0
Trial  4  raw test scores for COV_type_data
0.8184343381735103
0.8185831637591299
0.8158103295844003
End of Trial 4
----------------------------

### For Table 2

In [37]:
print('Accuracy metric values across all datasets, across 5 trials, for RF:')
print(accuracy_metric)

print()

print()
print('F-score metric values across all datasets, across 5 trials, for RF:')
print(f1_score_metric)

print()

print('ROC_AUC metric values across all datasets, across 5 trials, for RF:')
print(roc_auc_score_metric)

print()

print('Average scores for each metric: ')
print('ACC:', sum(accuracy_metric)/len(accuracy_metric))
print('FSC:', sum(f1_score_metric)/len(f1_score_metric))
print('ROC_AUC:', sum(roc_auc_score_metric)/len(roc_auc_score_metric))

Accuracy metric values across all datasets, across 5 trials, for RF:
[0.8235088852315576, 0.8185610716443408, 0.8249880210828942, 0.8184343381735103, 0.8223439789448831, 0.8524001306193534, 0.8542505714596713, 0.8493886288596205, 0.8493886288596205, 0.8520735822357679, 0.9883333333333333, 0.9876666666666667, 0.9913333333333333, 0.9880666666666666, 0.9894, 0.9462, 0.9444, 0.9478, 0.9465333333333333, 0.9452]


F-score metric values across all datasets, across 5 trials, for RF:
[0.8250412610122587, 0.8167814072862484, 0.8232435076719146, 0.8158103295844003, 0.8220733878595672, 0.6624626617988715, 0.6632011402699757, 0.6646740447532111, 0.6642943792964011, 0.661856183130132, 0.8205128205128205, 0.8198636806231743, 0.87, 0.8186423505572443, 0.8408408408408408, 0.9462179273575473, 0.9435265438786566, 0.9476918965862783, 0.9461383478844863, 0.9451048484039]

ROC_AUC metric values across all datasets, across 5 trials, for RF:
[0.8242443630436314, 0.8188204130922623, 0.8252495725808819, 0.81858

Write the 20 values for each metric to a .csv to do p-test comparisons. 

In [44]:
with open('Table_2_p_test', 'a') as f: 
      
    # using csv.writer method from CSV package 
    write = csv.writer(f) 
    
    write.writerow(accuracy_metric) 

    write.writerow(f1_score_metric)
    
    write.writerow(roc_auc_score_metric)

###  For Table 3

In [46]:
print('COV_type')
print('Metric values across 5 trials, for RF:')
print(COV_type_metric)

print()

print('ADULT')
print('Metric values across 5 trials, for RF:')
print(ADULT_metric)

print()

print('LETTER.p1')
print('Metric values across 5 trials, for RF:')
print(LETTER_p1_metric)

print()

print('LETTER.p2')
print('Metric values across 5 trials, for RF:')
print(LETTER_p2_metric)

COV_type
Metric values across 5 trials, for RF:
[0.8235088852315576, 0.8242443630436314, 0.8250412610122587, 0.8185610716443408, 0.8188204130922623, 0.8167814072862484, 0.8249880210828942, 0.8252495725808819, 0.8232435076719146, 0.8184343381735103, 0.8185831637591299, 0.8158103295844003, 0.8223439789448831, 0.8228222678883678, 0.8220733878595672]

ADULT
Metric values across 5 trials, for RF:
[0.8524001306193534, 0.7660726804913083, 0.6624626617988715, 0.8542505714596713, 0.7645542052930975, 0.6632011402699757, 0.8493886288596205, 0.7713484440554236, 0.6646740447532111, 0.8493886288596205, 0.771186018515124, 0.6642943792964011, 0.8520735822357679, 0.7665547732446949, 0.661856183130132]

LETTER.p1
Metric values across 5 trials, for RF:
[0.9883333333333333, 0.8560214407429669, 0.8205128205128205, 0.9876666666666667, 0.8620295088239514, 0.8198636806231743, 0.9913333333333333, 0.8983907666627527, 0.87, 0.9880666666666666, 0.8636172005691807, 0.8186423505572443, 0.9894, 0.8781960804110016, 0

Write the 15 values for each metric to a .csv to do p-test comparisons. 

In [47]:
with open('Table_3_p_test', 'a') as f: 
      
    # using csv.writer method from CSV package 
    write = csv.writer(f) 
    
    write.writerow(COV_type_metric) 

    write.writerow(ADULT_metric)
    
    write.writerow(LETTER_p1_metric)
    
    write.writerow(LETTER_p2_metric)

In [39]:
print('Average metric scores for each dataset across 5 trials: ')

print()
print('COV_type:', sum(COV_type_metric)/len(COV_type_metric))
print()

print('ADULT:', sum(ADULT_metric)/len(ADULT_metric))
print()

print('LETTER.p1:', sum(LETTER_p1_metric)/len(LETTER_p1_metric))
print()

print('LETTER.p2:', sum(LETTER_p2_metric)/len(LETTER_p2_metric))

Average metric scores for each dataset across 5 trials: 

COV_type: 0.8213670645903897

ADULT: 0.7609137381921515

LETTER.p1: 0.8981943126495955

LETTER.p2: 0.9459354562007432
