In [1]:
model_type = 'SVM'
from sklearn.svm import SVC

In [2]:
import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

import ray
from ray import tune
from ray.tune.schedulers import HyperBandForBOHB
from ray.tune.suggest.bohb import TuneBOHB
import ConfigSpace as CS
from functools import partial

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from joblib import dump, load

In [3]:
# Device configuration
device = torch.device('cpu')
classes = ['Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']

In [5]:
def model_eval(features, labels, model, dataset):
    actual_classes = labels
    predicted_classes = model.predict(features)
    
    #since labels are read in based on ordering in the folder, 
    #this corrects the labels so they reflect the correct classes
    #############################################################
    if dataset == 'gen':
        labels_dict = {
            0:3,
            1:5,
            2:7,
            3:8
        }
        for index, item in enumerate(actual_classes):
            actual_classes[index] = labels_dict[item]
    #############################################################
    
    performance_report = classification_report(
                        actual_classes, 
                        predicted_classes, 
                        labels=list(range(0,10)), 
                        target_names=classes, 
                        output_dict=True
                        )

    with open(f'performance_report_{dataset}.json', 'w') as f:
        json.dump(performance_report, f, indent=0)

    overall_accuracy = accuracy_score(actual_classes, predicted_classes)

    comparison_list = [['Actual', 'Predicted']]

    for i in range(0,len(actual_classes)):
        comparison_list.append([actual_classes[i], predicted_classes[i]])
    np.savetxt(f'class_pred_{dataset}.csv', comparison_list, delimiter=',', fmt='%s')
    
    labeled_actual = []
    labeled_predicted = []
    for index, item in enumerate(actual_classes):
        labeled_actual.append(classes[actual_classes[index]])
        labeled_predicted.append(classes[predicted_classes[index]])
        
    plt.clf()
    c_matrix = confusion_matrix(labeled_actual, labeled_predicted)
    c_df = pd.DataFrame(c_matrix, index=classes, columns=classes)
    plt.figure(figsize=(13,13))
    sns.heatmap(c_df, annot=True, fmt='g')
    plt.title('Confusion Matrix')
    plt.ylabel('Actual Class')
    plt.xlabel('Predicted Class')
    plt.savefig(f'labeled_confusion_matrix_{dataset}.png', bbox_inches='tight')
    plt.show()
    
    return overall_accuracy

In [7]:
def train_SVM(config, checkpoint_dir=None):
    xtrain = torch.load('C:/Users/s_kal/Desktop/9039-ML/Final Project/Code/train_extracted_features.pt',map_location=torch.device('cpu'))
    ytrain = torch.load('C:/Users/s_kal/Desktop/9039-ML/Final Project/Code/train_extracted_labels.pt',map_location=torch.device('cpu'))
    xtest = torch.load('C:/Users/s_kal/Desktop/9039-ML/Final Project/Code/val_extracted_features.pt',map_location=torch.device('cpu'))
    ytest = torch.load('C:/Users/s_kal/Desktop/9039-ML/Final Project/Code/val_extracted_labels.pt',map_location=torch.device('cpu'))
    
#     For initial tuning:
    model = SVC(C=config['C'], gamma=config['g'], tol=config['tol'])
    model.fit(xtrain, ytrain)
    
#     For learning curve of best model:
#     plt.clf()
#     plot_learning_curves(xtrain, ytrain, xtest, ytest, model)
#     plt.savefig('learning_curve.png')
    
    dump(model, f'model_{model_type}.joblib') 
            
    train_acc =  model_eval(xtrain, ytrain, model, 'train')
    val_acc = model_eval(xtest, ytest, model, 'val')
    
    tune.report(
        train_ACC=train_acc,
        val_ACC=val_acc,
    )

In [8]:
# obtaining scale for hyperparameter tuning
xtrain = torch.load('C:/Users/s_kal/Desktop/9039-ML/Final Project/Code/train_extracted_features.pt',map_location=torch.device('cpu'))
scale = 1/(len(xtrain[0])*np.array(xtrain).var())

In [9]:
def main(num_samples=15):
    config = {
        'C':tune.choice([1e-2, 1e-1, 1, 1e1, 1e2]),
        'g':tune.choice([scale/10, scale, scale*10]),
        'tol':tune.choice([1e-4, 1e-3, 1e-2])
    }
            
    algo=TuneBOHB(metric='train_ACC', 
                  mode='max'
                 )
    
    bohb = HyperBandForBOHB(time_attr="training_iteration",
                            metric="train_ACC",
                            mode="max",
                            max_t=1
                           )
        
    result = tune.run(
        tune.with_parameters(train_SVM),
        resources_per_trial={"cpu": 8, "gpu": 0},
        config=config,
        num_samples=num_samples,
        scheduler=bohb,
        search_alg=algo,
        progress_reporter=tune.JupyterNotebookReporter(overwrite=True, print_intermediate_tables=True),
        fail_fast=False, 
        sync_config=tune.SyncConfig(
        syncer=None  # Disable syncing
        )
    )
    
    result.results_df.to_csv(f'results_df_{model_type}_rbf.csv')
    return result
# BOHB - https://arxiv.org/abs/1807.01774
# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-bohb

In [10]:
result_rbf = main()

Trial name,status,loc,C,g,tol,iter,total time (s),train_ACC,val_ACC
train_SVM_24303f52,TERMINATED,127.0.0.1:17428,0.1,0.00658728,0.001,1,193.637,0.998311,0.9996
train_SVM_244b2d88,TERMINATED,127.0.0.1:7224,0.01,0.00658728,0.01,1,567.459,0.9966,0.999
train_SVM_9a68e026,TERMINATED,127.0.0.1:17824,1.0,0.00658728,0.01,1,100.584,0.999689,0.9994
train_SVM_ef4922c9,TERMINATED,127.0.0.1:20024,10.0,0.000658728,0.001,1,54.5925,0.999578,0.9992
train_SVM_2deecbd8,TERMINATED,127.0.0.1:13076,0.1,0.00658728,0.0001,1,185.323,0.998311,0.9996
train_SVM_50da02f5,TERMINATED,127.0.0.1:9932,100.0,0.00658728,0.0001,1,106.197,1.0,0.9996
train_SVM_c207144d,TERMINATED,127.0.0.1:15708,0.1,0.00658728,0.01,1,185.4,0.998311,0.9996
train_SVM_043c81fd,TERMINATED,127.0.0.1:2704,0.01,0.000658728,0.001,1,1155.71,0.996733,0.9994
train_SVM_755a8ed2,TERMINATED,127.0.0.1:21260,10.0,0.000658728,0.001,1,56.3212,0.999578,0.9992
train_SVM_29328cd9,TERMINATED,127.0.0.1:14624,1.0,0.000658728,0.01,1,103.699,0.9984,0.9996


2022-07-22 01:55:14,819	INFO tune.py:639 -- Total run time: 9756.24 seconds (9756.04 seconds for the tuning loop).


[2m[36m(train_SVM pid=15332)[0m Figure(640x480)
[2m[36m(train_SVM pid=15332)[0m Figure(1300x1300)




BOHB Example: https://docs.ray.io/en/latest/tune/examples/includes/bohb_example.html