In [1]:
import pandas as pd
import numpy as np
import scipy as sc
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from fnc_LR import LR
from fnc_KNN import KNN
from fnc_SVM import SVM
from fnc_RF import RF
from fnc_NB import NB


ind = list(range(37))

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/satimage/sat.tst"
df=pd.read_csv(url,delimiter=' ',names = ind)
samples,features = df.shape

# define dataset
X=df.iloc[:,0:features-1]
y=df.iloc[:,features-1]

#--------------Change clk value to find the Parameters used for Tuning--------
clk =0
if clk == 1:
    LR(X,y)
elif clk == 2:
    KNN(X,y)
elif clk == 3:
    SVM(X,y)
elif clk == 4:  
    RF(X,y)      
elif clk == 5:
    NB(X,y)    
#--------------------------------------------------------------------------------------


#--------------------------------Start Traain the model------------------------------
train = 1
if train == 1 :
    X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, \
            test_size=0.20, random_state=1, shuffle=True)        

    models = []
    #--------------------------------Change these value accoarding the tuning parameter------------------------------
    models.append(('NB', GaussianNB(var_smoothing=0.0008111308307896872)))
    models.append(('RF', RandomForestClassifier(max_features= 'sqrt',n_estimators= 1000)))
    models.append(('SVM', SVC(C=10,gamma ='scale',kernel='rbf')))   
    models.append(('KNN', KNeighborsClassifier(metric='manhattan',n_neighbors=3,weights='distance')))
    models.append(('LR', LogisticRegression(C=0.01,penalty='l2',solver='newton-cg',)))
    #--------------------------------------------------------------------------------------    
    results = []
    names = []
    print('\n-------------Training Result----------')
    rkfold = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)

    for name, model in models:
        cv_results = cross_val_score(model, X_train, Y_train, cv=rkfold, scoring='accuracy')
        results.append(cv_results)
        names.append(name)
        y_train_pred = cross_val_predict(model, X_train, Y_train, cv=kfold)
        ps = precision_score(Y_train, y_train_pred,average = 'weighted')
        
        cfm = confusion_matrix(Y_train, y_train_pred)
        FP = cfm.sum(axis=0) - np.diag(cfm)  
        FN = cfm.sum(axis=1) - np.diag(cfm)
        TP = np.diag(cfm)
        TN = cfm.sum() - (FP + FN + TP)
        FNR = np.mean(FN/(TP+FN))
                    
        print('%s: %f (%f) K-accuracy: %f FAR: %f' % (name, cv_results.mean(), cv_results.std(),ps,FNR)) 


-------------Training Result----------
NB: 0.796042 (0.023853) K-accuracy: 0.816448 FAR: 0.209532
RF: 0.893333 (0.026117) K-accuracy: 0.890459 FAR: 0.128974
SVM: 0.892500 (0.018985) K-accuracy: 0.889103 FAR: 0.124815
KNN: 0.885833 (0.023270) K-accuracy: 0.884871 FAR: 0.130443




LR: 0.848125 (0.020165) K-accuracy: 0.840235 FAR: 0.187561


