# Baseline Experiments for Classifier

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.ensemble import RandomForestClassifier as rf
from sklearn.neural_network import MLPClassifier as mlp
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
from sklearn.linear_model import LogisticRegression as lr 
import json

In [None]:
main_dir ='C://Users/Declan/iNaturalist_2017/'
features_dir = main_dir + 'Features/'
test_dir = main_dir + "test/"
report_dir = main_dir + 'reports/'

In [None]:
# Load in features
training_features_data = np.load(os.path.join(features_dir, "training_features_"+n+"shot.npz"))
test_features_data = np.load(os.path.join(features_dir, "test_features_"+n+"shot.npz"))

# training features
X_train = training_features_data['features']
y_train = training_features_data['labels']
# Get test imgs ready
X_test = test_features_data['features']
y_test = test_features_data['labels']

In [None]:

dic = {}

for n in ["1","3","5", "10", "20" ]:
    ## Load in features
    training_features_data = np.load(os.path.join(features_dir, "training_features_"+n+"shot.npz"))
    test_features_data = np.load(os.path.join(features_dir, "test_features_"+n+"shot.npz"))
    
    ## training features
    X_train = training_features_data['features']
    y_train = training_features_data['labels']
    ## Get test imgs ready
    X_test = test_features_data['features']
    y_test = test_features_data['labels']
    ## Support Vector Machines
    for k in ['linear','poly', 'rbf', 'sigmoid']:
        for m in [0.1, 1, 2, 5]:
            svclassifier = SVC(kernel=k, C=m)
            svclassifier.fit(X_train, y_train)
            y_pred = svclassifier.predict(X_test)
            name = report_dir + n + '_shot_'+'_SVM_' + k + str(m)
            acc = metrics.accuracy_score(y_test, y_pred)
            print(name, "  :  ", acc)
            dic[name] = acc
    #
    ## K Nearest Neighbour 
    for k in ['uniform', 'distance']:
        for m in [5, 10, 20, 100]:
            knn_fn = knn(weights = 'uniform', n_neighbors = m)
            knn_fn.fit(X_train, y_train)
            y_pred = knn_fn.predict(X_test)
            cf_matrix = np.array(confusion_matrix(y_test,y_pred))
            report = np.array(classification_report(y_test,y_pred))
            name = report_dir + n + '_shot_'+'_knn_' + k + str(m)
            acc = metrics.accuracy_score(y_test, y_pred)
            print(name, "  :  ", acc)
            dic[name] = acc
    ## Random Forest
    for k in [ 100, 200, 400 ]:
        for m in ['gini', 'entropy']:
            rfc = rf(n_estimators = k, criterion = m)   
            rfc.fit(X_train, y_train)
            y_pred = rfc.predict(X_test)
            name = report_dir + n + '_shot_'+'_rf_' + str(k) +"_" + m
            acc = metrics.accuracy_score(y_test, y_pred)
            print(name, "  :  ", acc)
            dic[name] = acc
    
    ## Multi Layer Perceptron
    for k in [0.001, 0.002, 0.005, 0.01, 0.1]:
        for m in [(100,), (200,), (50,)]:
            mlpc = mlp(max_iter = 500,solver = "adam", learning_rate_init = k, )
            mlpc.fit(X_train, y_train)
            y_pred = mlpc.predict(X_test)
            name = report_dir + n + '_shot'+'_mlp_' +"lr_"+ str(k) + str(m)
            acc = metrics.accuracy_score(y_test, y_pred)
            print(name, "  :  ", acc)
            print("N layers: ", mlpc.n_layers_)
            dic[name] = acc
        
    ## Logistic Regression
    for k in [0.01, 0.025, 0.05, 0.1, 1, 2, 5]:
        lrc = lr(max_iter = 1000, C = k)
        lrc.fit(X_train, y_train)
        y_pred = lrc.predict(X_test)
        
        name = report_dir + n + '_shot'+'_C_' +str(k)
        acc = metrics.accuracy_score(y_test, y_pred)
        print(name, "  :  ", acc)
        dic[name] = acc

In [None]:
with open(report_dir+'accuracies.json', 'r') as fp:
    accs = json.load( fp)

In [None]:
# x-axis has num of training images per class
# y-axis has overall accuracy for each class 
x_val = []
acc = []
for key in cls_report:
    key_str = str(key)
    if(key_str == 'accuracy'):
        break
    x_val.append(key_str)
    acc.append(cls_report[key_str]['f1-score'])
    
plt.bar(x_val,acc)
plt.xlabel('ClassID: Number of images within class')
plt.ylabel('F1 Accuracy for each class')
ax = plt.gca()
ax.set_xticks([20,40,60,80,100,120,140,160,180,199])
plt.title('Baseline Accuracy')