<a href="https://colab.research.google.com/github/nisanuro/CNG562-Assignment-2/blob/master/CNG562_Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from mpl_toolkits.mplot3d import Axes3D
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics, datasets, preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC, SVC

%matplotlib inline

In [0]:
def dataVisualizaion(iris):
    x_index = 0
    y_index = 1

    formatter = plt.FuncFormatter(lambda i, *args: iris.target_names[int(i)])
    
    plt.figure(figsize=(5, 4))
    plt.scatter(iris.data[:, x_index], iris.data[:, y_index], c=iris.target)
    plt.colorbar(ticks=[0, 1, 2], format=formatter)
    plt.xlabel(iris.feature_names[x_index])
    plt.ylabel(iris.feature_names[y_index])

    plt.tight_layout()
    plt.show()

In [0]:
def threeDVisualization(X, y):

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    fig = plt.figure(1, figsize=(16, 9))
    ax = Axes3D(fig, elev=-150, azim=110)
    X_reduced = PCA(n_components=3).fit_transform(X_scaled)
    ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
    ax.set_title("First three PCA directions")
    ax.set_xlabel("1st eigenvector")
    ax.w_xaxis.set_ticklabels([])
    ax.set_ylabel("2nd eigenvector")
    ax.w_yaxis.set_ticklabels([])
    ax.set_zlabel("3rd eigenvector")
    ax.w_zaxis.set_ticklabels([])

    plt.show()
    print("The number of features in the new subspace is ", X_reduced.shape[1])

    return X_reduced

In [0]:
def randomOneHoldout(X_train, Y_train):
  x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.3, random_state=0)
  return x_train, x_test, y_train, y_test

In [0]:
def stratifiedOneHoldout(X_train, Y_train):
  x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.3, random_state=0)
  return x_train, x_test, y_train, y_test

In [0]:
def kNN(k: int, metric: str, X_train, Y_train):
  
  #Model
  if metric == "mahalanobis":
    knn = KNeighborsClassifier(n_neighbors=k, weights='uniform', metric=metric, algorithm="brute", metric_params={'V': np.cov(X_train)})
  else:
    knn = KNeighborsClassifier(n_neighbors=k, weights='uniform', metric=metric)

  #5-Fold
  cv_result_knn_5 = cross_val_score(knn, X_train, Y_train, cv=5, scoring='accuracy')
  
  #10-Fold
  cv_result_knn_10 = cross_val_score(knn, X_train, Y_train, cv=10, scoring='accuracy')
  
  #Random One Holdout
  x_train, x_test, y_train, y_test_random = randomOneHoldout(X_train, Y_train)
  knn.fit(x_train, y_train)
  y_pred_knn_random = knn.predict(x_test)
  
  #Stratified One Holdout
  x_train, x_test, y_train, y_test_stratified = stratifiedOneHoldout(X_train, Y_train)
  knn.fit(x_train, y_train)
  y_pred_knn_stratified = knn.predict(x_test)

  print("5 Fold")
  print("KNN Accuracy: ", cv_result_knn_5.mean())
  
  print("10 Fold")
  print("KNN Accuracy: ", cv_result_knn_10.mean())

  print("Random One Hold Out")
  print("KNN Accuracy: ", 1 - metrics.mean_squared_error(y_test_random, y_pred_knn_random))
  
  print("Stratified One Hold Out Fold")
  print("KNN Accuracy: ", 1 - metrics.mean_squared_error(y_test_stratified, y_pred_knn_stratified))

In [0]:
def svm(X_train, Y_train, kernel, weight, gamma):

    svm = SVC(C=1, kernel=kernel, degree=3, gamma=gamma, coef0=0.0, shrinking=True, 
          probability=False, tol=0.001, cache_size=200, class_weight=weight,
          max_iter=-1, decision_function_shape="ovr", random_state = 0)

    #5-Fold
    cv_result_svm_5 = cross_val_score(svm, X_train, Y_train, cv=5, scoring='accuracy')
  
    #10-Fold
    cv_result_svm_10 = cross_val_score(svm, X_train, Y_train, cv=10, scoring='accuracy')
  
    #Random One Holdout
    x_train, x_test, y_train, y_test_random = randomOneHoldout(X_train, Y_train)
    svm.fit(x_train, y_train)
    y_pred_svm_random = svm.predict(x_test)
  
    #Stratified One Holdout
    x_train, x_test, y_train, y_test_stratified = stratifiedOneHoldout(X_train, Y_train)
    svm.fit(x_train, y_train)
    y_pred_svm_stratified = svm.predict(x_test)

    print("5 Fold")
    print("KNN Accuracy: ", cv_result_svm_5.mean())
  
    print("10 Fold")
    print("KNN Accuracy: ", cv_result_svm_10.mean())

    print("Random One Hold Out")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(y_test_random, y_pred_svm_random))
  
    print("Stratified One Hold Out Fold")
    print("KNN Accuracy: ", 1 - metrics.mean_squared_error(y_test_stratified, y_pred_svm_stratified))

In [0]:
def displayAccuracy(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

    k = [3, 5, 7, 9, 11]
    metric = ["euclidean", "manhattan", "chebyshev", "mahalanobis", "minkowski", "wminkowski", "seuclidean"]
    kernel = ["linear", "poly", "rbf", "sigmoid"]
    weight = [None, "balanced"]
    gamma = ["auto", "scale"]
    '''
    for i in k:
      for j in metric:
          if j != "wminkowski" and j != "seuclidean":
            print("K: {} - Metric: {}".format(i, j))
            kNN(i, j, X_train, Y_train)
          print()
    '''
    for i in kernel:
        for j in weight:
            for k in gamma:
                if j != "linear":
                    print("Kernel: {} - Weight: {} - Gamma: {}".format(str(i), j, k))
                    svm(X_train, Y_train, i, j, k)
                else:
                    print("Kernel: {} - Weight: {} - Gamma: {}".format(str(i), j, "auto"))
                    svm(X_train, Y_train, i, j, k)
        print()

In [32]:
if __name__ == '__main__':

  iris = datasets.load_iris()
  X = iris.data
  Y = iris.target
  
  #threeDVisualization(iris.data[:, :], Y)
  
  # Z-Score
  scaler = StandardScaler()
  scaler.fit(X)
  z_score = scaler.transform(X)

  #Displaying result according to each type of methods and regression model
  print("\nRaw: ")
  displayAccuracy(X,Y)
  print("\nZ-Score: ")
  displayAccuracy(z_score,Y)


Raw: 
Kernel: linear - Weight: None - Gamma: auto
5 Fold
KNN Accuracy:  0.9714285714285715
10 Fold
KNN Accuracy:  0.9800000000000001
Random One Hold Out
KNN Accuracy:  1.0
Stratified One Hold Out Fold
KNN Accuracy:  1.0
Kernel: linear - Weight: None - Gamma: scale
5 Fold
KNN Accuracy:  0.9714285714285715
10 Fold
KNN Accuracy:  0.9800000000000001
Random One Hold Out
KNN Accuracy:  1.0
Stratified One Hold Out Fold
KNN Accuracy:  1.0
Kernel: linear - Weight: balanced - Gamma: auto
5 Fold
KNN Accuracy:  0.980952380952381
10 Fold
KNN Accuracy:  0.9800000000000001
Random One Hold Out
KNN Accuracy:  1.0
Stratified One Hold Out Fold
KNN Accuracy:  1.0
Kernel: linear - Weight: balanced - Gamma: scale
5 Fold
KNN Accuracy:  0.980952380952381
10 Fold
KNN Accuracy:  0.9800000000000001
Random One Hold Out
KNN Accuracy:  1.0
Stratified One Hold Out Fold
KNN Accuracy:  1.0

Kernel: poly - Weight: None - Gamma: auto
5 Fold
KNN Accuracy:  0.9619047619047618
10 Fold
KNN Accuracy:  0.9518181818181818
Ran

In [0]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0, stratify=Y)

In [0]:
Train_x, TrainDev_x, Train_y, TrainDev_y = train_test_split(X_train, Y_train, test_size=0.2, random_state=0, stratify=Y_train)
 Dev_x, Test_x, Dev_y, Test_y = train_test_split(X_test, Y_test, test_size=0.5, random_state=0, stratify=Y_test)