In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import normalize
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

# SVC

In [2]:
#=====Data Reading=====#
dir_path = os.path.dirname(os.path.abspath("__file__"))
df = pd.read_csv(dir_path + '/ionosphere.data', header = None)

#=====Data Processing=====#
# Recategorize the classification labels into a binary classifier
categ = {"g" : 1, "b" : 0}
df[34] = [categ[item] for item in df[34]]
labels = df[34]
inputs = df.drop([34],axis=1)

# Seperate data into a training/test set
input_train, input_test, output_train, output_test = train_test_split(inputs, labels, test_size=0.33, random_state=42)

#Class Balancing
sm = SMOTE(random_state = 27, sampling_strategy = 1.0)
input_train_res, output_train_res = sm.fit_sample(input_train, output_train.ravel())

#=====Machine Learning Architecture=====#
model = SVC(kernel='rbf',gamma='scale',C=1.0)
hist = model.fit(input_train_res,output_train_res)

#=====Validation of ML Model=====#
accuracies = cross_val_score(estimator = model, X = input_train, y = output_train, cv = 10, n_jobs = -1)
mean = accuracies.mean()
variance = accuracies.std()
maxacc= accuracies.max()
minacc=accuracies.min()
print("\nAccuracy mean: "+ str(mean))
print("Accuracy variance: "+ str(variance))
print("Accuracy max: "+ str(maxacc))
print("Accuracy min: "+ str(minacc))

#=====Analysis of ML Model=====#
acc=0.0
for i in range(25):
    predictions = model.predict(input_test)
    acc=acc+accuracy_score(output_test, predictions)
print ("\naccuracy_score :",acc/25)
    
# Skill Score Matrix
print ("\nclassification report :\n",(classification_report(output_test, predictions)))
#Confusion Matrix
print(confusion_matrix(output_test,predictions))



Accuracy mean: 0.923913043478261
Accuracy variance: 0.061658108365746825
Accuracy max: 1.0
Accuracy min: 0.7916666666666666

accuracy_score : 0.9655172413793099

classification report :
               precision    recall  f1-score   support

           0       1.00      0.91      0.95        45
           1       0.95      1.00      0.97        71

    accuracy                           0.97       116
   macro avg       0.97      0.96      0.96       116
weighted avg       0.97      0.97      0.97       116

[[41  4]
 [ 0 71]]


# RFC

In [3]:
#=====Data Reading=====#
dir_path = os.path.dirname(os.path.abspath("__file__"))
df = pd.read_csv(dir_path + '/ionosphere.data', header = None)

#=====Data Processing=====#
# Recategorize the classification labels into a binary classifier
categ = {"g" : 1, "b" : 0}
df[34] = [categ[item] for item in df[34]]
labels = df[34]
inputs = df.drop([34],axis=1)

# Seperate data into a training/test set
input_train, input_test, output_train, output_test = train_test_split(inputs, labels, test_size=0.33, random_state=42)

#Class Balancing
sm = SMOTE(random_state = 27, sampling_strategy = 1.0)
input_train_res, output_train_res = sm.fit_sample(input_train, output_train.ravel())

#=====Machine Learning Architecture=====#
model = RandomForestClassifier(n_estimators=200,criterion="entropy")
hist = model.fit(input_train_res,output_train_res)

#=====Validation of ML Model=====#
accuracies = cross_val_score(estimator = model, X = input_train, y = output_train, cv = 10, n_jobs = -1)
mean = accuracies.mean()
variance = accuracies.std()
maxacc= accuracies.max()
minacc=accuracies.min()
print("\nAccuracy mean: "+ str(mean))
print("Accuracy variance: "+ str(variance))
print("Accuracy max: "+ str(maxacc))
print("Accuracy min: "+ str(minacc))

#=====Analysis of ML Model=====#
acc=0.0
for i in range(25):
    predictions = model.predict(input_test)
    acc=acc+accuracy_score(output_test, predictions)
print ("\naccuracy_score :",acc/25)
    
# Skill Score Matrix
print ("\nclassification report :\n",(classification_report(output_test, predictions)))
#Confusion Matrix
print(confusion_matrix(output_test,predictions))



Accuracy mean: 0.9278985507246377
Accuracy variance: 0.049980304879686514
Accuracy max: 1.0
Accuracy min: 0.8333333333333334

accuracy_score : 0.9482758620689655

classification report :
               precision    recall  f1-score   support

           0       1.00      0.87      0.93        45
           1       0.92      1.00      0.96        71

    accuracy                           0.95       116
   macro avg       0.96      0.93      0.94       116
weighted avg       0.95      0.95      0.95       116

[[39  6]
 [ 0 71]]


# KNN

In [4]:
#=====Data Reading=====#
dir_path = os.path.dirname(os.path.abspath("__file__"))
df = pd.read_csv(dir_path + '/ionosphere.data', header = None)

#=====Data Processing=====#
# Recategorize the classification labels into a binary classifier
categ = {"g" : 1, "b" : 0}
df[34] = [categ[item] for item in df[34]]
labels = df[34]
inputs = df.drop([34],axis=1)

# Seperate data into a training/test set
input_train, input_test, output_train, output_test = train_test_split(inputs, labels, test_size=0.33, random_state=42)

#Class Balancing
sm = SMOTE(random_state = 27, sampling_strategy = 1.0)
input_train_res, output_train_res = sm.fit_sample(input_train, output_train.ravel())

#=====Machine Learning Architecture=====#
model = KNeighborsClassifier()
hist = model.fit(input_train_res,output_train_res)

#=====Validation of ML Model=====#
accuracies = cross_val_score(estimator = model, X = input_train, y = output_train, cv = 10, n_jobs = -1)
mean = accuracies.mean()
variance = accuracies.std()
maxacc= accuracies.max()
minacc=accuracies.min()
print("\nAccuracy mean: "+ str(mean))
print("Accuracy variance: "+ str(variance))
print("Accuracy max: "+ str(maxacc))
print("Accuracy min: "+ str(minacc))

#=====Analysis of ML Model=====#
acc=0.0
for i in range(25):
    predictions = model.predict(input_test)
    acc=acc+accuracy_score(output_test, predictions)
print ("\naccuracy_score :",acc/25)
    
# Skill Score Matrix
print ("\nclassification report :\n",(classification_report(output_test, predictions)))
#Confusion Matrix
print(confusion_matrix(output_test,predictions))



Accuracy mean: 0.8219202898550725
Accuracy variance: 0.05706406716117055
Accuracy max: 0.9130434782608695
Accuracy min: 0.7083333333333334

accuracy_score : 0.8879310344827583

classification report :
               precision    recall  f1-score   support

           0       1.00      0.71      0.83        45
           1       0.85      1.00      0.92        71

    accuracy                           0.89       116
   macro avg       0.92      0.86      0.87       116
weighted avg       0.91      0.89      0.88       116

[[32 13]
 [ 0 71]]


# DTC

In [5]:
#=====Data Reading=====#
dir_path = os.path.dirname(os.path.abspath("__file__"))
df = pd.read_csv(dir_path + '/ionosphere.data', header = None)

#=====Data Processing=====#
# Recategorize the classification labels into a binary classifier
categ = {"g" : 1, "b" : 0}
df[34] = [categ[item] for item in df[34]]
labels = df[34]
inputs = df.drop([34],axis=1)

# Seperate data into a training/test set
input_train, input_test, output_train, output_test = train_test_split(inputs, labels, test_size=0.33, random_state=42)

#Class Balancing
sm = SMOTE(random_state = 27, sampling_strategy = 1.0)
input_train_res, output_train_res = sm.fit_sample(input_train, output_train.ravel())

#=====Machine Learning Architecture=====#
model = DecisionTreeClassifier()
hist = model.fit(input_train_res,output_train_res)

#=====Validation of ML Model=====#
accuracies = cross_val_score(estimator = model, X = input_train, y = output_train, cv = 10, n_jobs = -1)
mean = accuracies.mean()
variance = accuracies.std()
maxacc= accuracies.max()
minacc=accuracies.min()
print("\nAccuracy mean: "+ str(mean))
print("Accuracy variance: "+ str(variance))
print("Accuracy max: "+ str(maxacc))
print("Accuracy min: "+ str(minacc))

#=====Analysis of ML Model=====#
acc=0.0
for i in range(25):
    predictions = model.predict(input_test)
    acc=acc+accuracy_score(output_test, predictions)
print ("\naccuracy_score :",acc/25)
    
# Skill Score Matrix
print ("\nclassification report :\n",(classification_report(output_test, predictions)))
#Confusion Matrix
print(confusion_matrix(output_test,predictions))



Accuracy mean: 0.8384057971014492
Accuracy variance: 0.07701640727876186
Accuracy max: 0.9583333333333334
Accuracy min: 0.7083333333333334

accuracy_score : 0.8965517241379313

classification report :
               precision    recall  f1-score   support

           0       0.85      0.89      0.87        45
           1       0.93      0.90      0.91        71

    accuracy                           0.90       116
   macro avg       0.89      0.90      0.89       116
weighted avg       0.90      0.90      0.90       116

[[40  5]
 [ 7 64]]
