<a href="https://colab.research.google.com/github/nisanuro/CNG562-Assignment-2/blob/master/CNG562_Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from mpl_toolkits.mplot3d import Axes3D
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics, datasets, preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC, SVC

from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report

%matplotlib inline

In [0]:
def dataVisualizaion(iris):
    x_index = 0
    y_index = 1

    formatter = plt.FuncFormatter(lambda i, *args: iris.target_names[int(i)])
    
    plt.figure(figsize=(5, 4))
    plt.scatter(iris.data[:, x_index], iris.data[:, y_index], c=iris.target)
    plt.colorbar(ticks=[0, 1, 2], format=formatter)
    plt.xlabel(iris.feature_names[x_index])
    plt.ylabel(iris.feature_names[y_index])

    plt.tight_layout()
    plt.show()

In [0]:
def threeDVisualization(X, y):

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    fig = plt.figure(1, figsize=(16, 9))
    ax = Axes3D(fig, elev=-150, azim=110)
    X_reduced = PCA(n_components=3).fit_transform(X_scaled)
    ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
    ax.set_title("First three PCA directions")
    ax.set_xlabel("1st eigenvector")
    ax.w_xaxis.set_ticklabels([])
    ax.set_ylabel("2nd eigenvector")
    ax.w_yaxis.set_ticklabels([])
    ax.set_zlabel("3rd eigenvector")
    ax.w_zaxis.set_ticklabels([])

    plt.show()
    print("The number of features in the new subspace is ", X_reduced.shape[1])

    return X_reduced

In [0]:
def randomOneHoldout(X_train, Y_train):
    x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=0)
    return x_train, x_test, y_train, y_test

In [0]:
def stratifiedOneHoldout(X_train, Y_train):
    x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=0)
    return x_train, x_test, y_train, y_test

In [0]:
def kNN(k: int, metric: str, X_train, Y_train):
  
    #Model
    if metric == "mahalanobis":
      knn = KNeighborsClassifier(n_neighbors=k, weights='distance', metric=metric, algorithm="brute", metric_params={'V': np.cov(X_train)})
    else:
      knn = KNeighborsClassifier(n_neighbors=k, weights='distance', metric=metric)

    #5-Fold
    cv_result_knn_5 = cross_val_score(knn, X_train, Y_train, cv=5, scoring='accuracy')
  
    #10-Fold
    cv_result_knn_10 = cross_val_score(knn, X_train, Y_train, cv=10, scoring='accuracy')
  
    #Random One Holdout
    x_train, x_test, y_train, y_test_random = randomOneHoldout(X_train, Y_train)
    knn.fit(x_train, y_train)

    y_pred_knn_random = knn.predict(x_test)
  
    #Stratified One Holdout
    x_train, x_test, y_train, y_test_stratified = stratifiedOneHoldout(X_train, Y_train)
    knn.fit(x_train, y_train)
    y_pred_knn_stratified = knn.predict(x_test)

    print("5 Fold")
    print("KNN Accuracy: ", cv_result_knn_5.mean())
  
    print("10 Fold")
    print("KNN Accuracy: ", cv_result_knn_10.mean())

    print("Random One Hold Out")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(y_test_random, y_pred_knn_random))
  
    print("Stratified One Hold Out Fold")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(y_test_stratified, y_pred_knn_stratified))

In [0]:
def svm(X_train, Y_train, kernel, weight, gamma):

    svm = SVC(C=1, kernel=kernel, degree=3, gamma=gamma, coef0=0.0, shrinking=True, 
          probability=False, tol=0.001, cache_size=200, class_weight=weight,
          max_iter=-1, decision_function_shape="ovr", random_state = 0)

    #5-Fold
    cv_result_svm_5 = cross_val_score(svm, X_train, Y_train, cv=5, scoring='accuracy')
  
    #10-Fold
    cv_result_svm_10 = cross_val_score(svm, X_train, Y_train, cv=10, scoring='accuracy')
  
    #Random One Holdout
    x_train, x_test, y_train, y_test_random = randomOneHoldout(X_train, Y_train)
    svm.fit(x_train, y_train)
    y_pred_svm_random = svm.predict(x_test)
  
    #Stratified One Holdout
    x_train, x_test, y_train, y_test_stratified = stratifiedOneHoldout(X_train, Y_train)
    svm.fit(x_train, y_train)
    y_pred_svm_stratified = svm.predict(x_test)

    print("5 Fold")
    print("SVM Accuracy: ", cv_result_svm_5.mean())
  
    #print("10 Fold")
    #print("SVM Accuracy: ", cv_result_svm_10.mean())

    #print("Random One Hold Out")
    #print("SVM Accuracy: ", 1 - metrics.mean_squared_error(y_test_random, y_pred_svm_random))
  
    #print("Stratified One Hold Out Fold")
    #print("SVM Accuracy: ", 1 - metrics.mean_squared_error(y_test_stratified, y_pred_svm_stratified))

In [0]:
def AdaBoost(model, n_estimators, learning_rate, X_train, Y_train, X_test, Y_test):
    clf = AdaBoostClassifier(base_estimator = model, n_estimators= n_estimators, learning_rate=learning_rate, random_state=0, algorithm = 'SAMME.R')
    clf.fit(X_train, Y_train)
    clf.predict(X_test)
    print(clf.score(X_train, Y_train))

In [0]:
def GradientBoost(n_estimators, learning_rate, X_train, Y_train, X_test, Y_test):
    clf = GradientBoostingClassifier(n_estimators= n_estimators, learning_rate=learning_rate, random_state=0)
    clf.fit(X_train, Y_train)
    clf.predict(X_test)
    print(clf.score(X_train, Y_train))

In [0]:
def fourError(X, Y, model):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0, stratify=Y)
    
    Train_x, TrainDev_x, Train_y, TrainDev_y = train_test_split(X_train, Y_train, test_size=0.2, random_state=0, stratify=Y_train)
    Dev_x, Test_x, Dev_y, Test_y = train_test_split(X_test, Y_test, test_size=0.5, random_state=0, stratify=Y_test)

    model.fit(Train_x, Train_y)

    y_true, trainDev_pred = TrainDev_y, model.predict(TrainDev_x)

    print("Train-Train Dev,   e1:", metrics.mean_squared_error(TrainDev_y, trainDev_pred),"\n")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(TrainDev_y, trainDev_pred))
    print( '\nClassification report\n' )
    print(classification_report(y_true, trainDev_pred))

    y_true, dev_pred = Dev_y, model.predict(Dev_x)
    print("Train-Dev,   e2", metrics.mean_squared_error(Dev_y, dev_pred),"\n")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(Dev_y, dev_pred))
    print( '\nClassification report\n' )
    print(classification_report(y_true, dev_pred))

    y_true, test_pred = Test_y, model.predict(Test_x)
    print("Train-Test,   e3: ", metrics.mean_squared_error(Test_y, test_pred),"\n")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(Test_y, test_pred))
    print( '\nClassification report\n' )
    print(classification_report(y_true, test_pred))

    y_true, devTest_pred = Y_test, model.predict(X_test)
    print("Train-(Dev+Test),   e4: ", metrics.mean_squared_error(Y_test, devTest_pred),"\n")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(Y_test, devTest_pred))
    print( '\nClassification report\n' )
    print(classification_report(y_true, devTest_pred))

In [0]:
def displayAccuracy(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

    k = [3, 5, 7, 9, 11]
    metric = ["euclidean", "manhattan", "chebyshev", "mahalanobis", "minkowski", "wminkowski", "seuclidean"]
    kernel = ["linear", "rbf"]
    weight = [None, "balanced"]
    gamma = ["auto", "scale"]
    '''

    for i in k:
      for j in metric:
          if j != "wminkowski" and j != "seuclidean":
            print("K: {} - Metric: {}".format(i, j))
            kNN(i, j, X_train, Y_train)
          print()
    
    for i in kernel:
        for j in weight:
            for k in gamma:
                if i != "linear":
                    print("Kernel: {} - Weight: {} - Gamma: {}".format(str(i), j, k))
                    svm(X_train, Y_train, i, j, k)
                else:
                    print("Kernel: {} - Weight: {} - Gamma: {}".format(str(i), j, "auto"))
                    svm(X_train, Y_train, i, j, k)
        print()
    '''
    for i in kernel:
        for j in weight:
            for k in gamma:
                if i != "linear":
                    print("Kernel: {} - Weight: {} - Gamma: {}".format(str(i), j, k))
                    svm(X_train, Y_train, i, j, k)
                else:
                    print("Kernel: {} - Weight: {} - Gamma: {}".format(str(i), j, "auto"))
                    svm(X_train, Y_train, i, j, k)

In [27]:
if __name__ == '__main__':

  iris = datasets.load_iris()
  X = iris.data
  Y = iris.target
  
  #threeDVisualization(iris.data[:, :], Y)
  
  # Z-Score
  scaler = StandardScaler()
  scaler.fit(X)
  z_score = scaler.transform(X)

  #Displaying result according to each type of methods and regression model
  print("\nRaw: ")
  displayAccuracy(X,Y)
  #print("\nZ-Score: ")
  #displayAccuracy(z_score,Y)


Raw: 
Kernel: linear - Weight: None - Gamma: auto
5 Fold
SVM Accuracy:  0.9714285714285715
Kernel: linear - Weight: None - Gamma: auto
5 Fold
SVM Accuracy:  0.9714285714285715
Kernel: linear - Weight: balanced - Gamma: auto
5 Fold
SVM Accuracy:  0.980952380952381
Kernel: linear - Weight: balanced - Gamma: auto
5 Fold
SVM Accuracy:  0.980952380952381
Kernel: rbf - Weight: None - Gamma: auto
5 Fold
SVM Accuracy:  0.9714285714285713
Kernel: rbf - Weight: None - Gamma: scale
5 Fold
SVM Accuracy:  0.9523809523809523
Kernel: rbf - Weight: balanced - Gamma: auto
5 Fold
SVM Accuracy:  0.9714285714285713
Kernel: rbf - Weight: balanced - Gamma: scale
5 Fold
SVM Accuracy:  0.9619047619047618


In [38]:
    svm = SVC(C=1, degree=3, gamma="scale", coef0=0.0, shrinking=True, 
          probability=True, tol=0.001, cache_size=200, class_weight="balanced",
          max_iter=-1, decision_function_shape="ovr", random_state = 0)
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
    
    #svm.fit(X_train, Y_train)
    #y_true, y_pred = Y_test, svm.predict(X_test)
    #print("SVM Accuracy: ", 1 - metrics.mean_squared_error(Y_test, y_pred))

    fourError(X, Y, svm)

    #AdaBoost(svm, 100, 1, X_train, Y_train, X_test, Y_test)
    #GradientBoost(100, 1, X_train, Y_train, X_test, Y_test)

Train-Train Dev,   e1: 0.09523809523809523 

KNN Accuracy:  0.9047619047619048

Classification report

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.78      1.00      0.88         7
           2       1.00      0.71      0.83         7

    accuracy                           0.90        21
   macro avg       0.93      0.90      0.90        21
weighted avg       0.93      0.90      0.90        21

Train-Dev,   e2 0.0 

KNN Accuracy:  1.0

Classification report

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00         8

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22

Train-Test,   e3:  0.0 

KNN Accuracy:  1.0

Classification report

  

In [37]:
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

    knn = KNeighborsClassifier(n_neighbors=7, weights='uniform', metric="chebyshev")
    #knn.fit(X_train, Y_train)

    #y_true, y_pred = Y_test, knn.predict(X_test)
    fourError(X, Y, knn)

Train-Train Dev,   e1: 0.19047619047619047 

KNN Accuracy:  0.8095238095238095

Classification report

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.71      0.71      0.71         7
           2       0.71      0.71      0.71         7

    accuracy                           0.81        21
   macro avg       0.81      0.81      0.81        21
weighted avg       0.81      0.81      0.81        21

Train-Dev,   e2 0.045454545454545456 

KNN Accuracy:  0.9545454545454546

Classification report

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.88      1.00      0.93         7
           2       1.00      0.88      0.93         8

    accuracy                           0.95        22
   macro avg       0.96      0.96      0.96        22
weighted avg       0.96      0.95      0.95        22

Train-Test,   e3:  0.0 

KNN Accuracy: