In [None]:
model_type = 'LogReg'
from sklearn.linear_model import LogisticRegression
data_dir = '/content/drive/MyDrive/ECE9039-Project/Code/ConvNext/Retrained/'
tune_dir = '/content/drive/My Drive/ECE9039-Project/Code/Tune-Train/'
classes = ['Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']
!pip install ray
!pip install hpbandster ConfigSpace

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

import ray
from ray import tune
from ray.tune.schedulers import HyperBandForBOHB
from ray.tune.suggest.bohb import TuneBOHB
import ConfigSpace as CS
from functools import partial

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from joblib import dump, load
from google.colab import drive

device = torch.device('cpu')
drive.mount('/content/drive')

  import pandas.util.testing as tm


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# @article{scikit-learn,
#  title={Scikit-learn: Machine Learning in {P}ython},
#  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
#          and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
#          and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
#          Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
#  journal={Journal of Machine Learning Research},
#  volume={12},
#  pages={2825--2830},
#  year={2011}
# }

# @inproceedings{sklearn_api,
#   author    = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and
#                Fabian Pedregosa and Andreas Mueller and Olivier Grisel and
#                Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort
#                and Jaques Grobler and Robert Layton and Jake VanderPlas and
#                Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux},
#   title     = {{API} design for machine learning software: experiences from the scikit-learn
#                project},
#   booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning},
#   year      = {2013},
#   pages = {108--122},
# }

def model_eval(features, labels, model, dataset):
    actual_classes = labels
    predicted_classes = model.predict(features)
    
    #since labels are read in based on ordering in the folder, 
    #this corrects the labels so they reflect the correct classes
    #############################################################
    if dataset == 'gen':
        labels_dict = {
            0:3,
            1:5,
            2:7,
            3:8
        }
        for index, item in enumerate(actual_classes):
            actual_classes[index] = labels_dict[item]
    ############################################################
    
    performance_report = classification_report(
                        actual_classes, 
                        predicted_classes, 
                        labels=list(range(0,10)), 
                        target_names=classes, 
                        output_dict=True
                        )

    with open(f'performance_report_{dataset}.json', 'w') as f:
        json.dump(performance_report, f, indent=0)

    overall_accuracy = accuracy_score(actual_classes, predicted_classes)

    comparison_list = [['Actual', 'Predicted']]

    for i in range(0,len(actual_classes)):
        comparison_list.append([actual_classes[i], predicted_classes[i]])
    np.savetxt(f'class_pred_{dataset}.csv', comparison_list, delimiter=',', fmt='%s')
    
    labeled_actual = []
    labeled_predicted = []
    for index, item in enumerate(actual_classes):
        labeled_actual.append(classes[actual_classes[index]])
        labeled_predicted.append(classes[predicted_classes[index]])
        
    plt.clf()
    c_matrix = confusion_matrix(labeled_actual, labeled_predicted)
    c_df = pd.DataFrame(c_matrix, index=classes, columns=classes)
    plt.figure(figsize=(13,13))
    sns.heatmap(c_df, annot=True, fmt='g')
    plt.title('Confusion Matrix')
    plt.ylabel('Actual Class')
    plt.xlabel('Predicted Class')
    plt.savefig(f'labeled_confusion_matrix_{dataset}.png', bbox_inches='tight')
    plt.show()
    
    return overall_accuracy

In [None]:
def train_LogReg(config, checkpoint_dir=None):
    xtrain = torch.load(f'{data_dir}train_extracted_features.pt',map_location=torch.device('cpu'))
    ytrain = torch.load(f'{data_dir}train_extracted_labels.pt',map_location=torch.device('cpu'))
    xtest = torch.load(f'{data_dir}val_extracted_features.pt',map_location=torch.device('cpu'))
    ytest = torch.load(f'{data_dir}val_extracted_labels.pt',map_location=torch.device('cpu'))
    
#     For initial tuning:
    model = LogisticRegression(C=config['C'], penalty='elasticnet', max_iter=config['max_iter'], solver='saga', l1_ratio=config['l1_ratio'])
    model.fit(xtrain, ytrain)
      
    dump(model, f'model_{model_type}.joblib') 
            
    train_acc =  model_eval(xtrain, ytrain, model, 'train')
    val_acc = model_eval(xtest, ytest, model, 'val')
    
    tune.report(
        train_ACC=train_acc,
        val_ACC=val_acc,
    )

In [None]:
def main(num_samples=15):
    config = {
        'C':tune.choice([1e-2, 1e-1, 1, 1e1, 1e2]),
        'l1_ratio':tune.choice([0, 5e-1, 1]),
        'max_iter':tune.choice([50, 100, 150])
    }
            
    algo=TuneBOHB(metric='train_ACC', 
                  mode='max'
                 )
    
    bohb = HyperBandForBOHB(time_attr="training_iteration",
                            metric="train_ACC",
                            mode="max",
                            max_t=1
                           )
        
    result = tune.run(
        tune.with_parameters(train_LogReg),
        resources_per_trial={"cpu": 2, "gpu": 0},
        config=config,
        num_samples=num_samples,
        scheduler=bohb,
        search_alg=algo,
        progress_reporter=tune.JupyterNotebookReporter(overwrite=True, print_intermediate_tables=True),
        fail_fast=False, 
        local_dir=f'{tune_dir}tuning_data',
        sync_config=tune.SyncConfig(
        syncer=None  # Disable syncing
        )
    )
    
    result.results_df.to_csv(f'{tune_dir}results_df_{model_type}.csv')
    return result
# BOHB - https://arxiv.org/abs/1807.01774
# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-bohb

In [None]:
result_lr = main()

Trial name,status,loc,C,l1_ratio,max_iter,iter,total time (s),train_ACC,val_ACC
train_LogReg_56b8b8d6,TERMINATED,172.28.0.2:1244,100.0,0.0,100,1,235.637,1.0,0.9994
train_LogReg_58f6c156,TERMINATED,172.28.0.2:1244,10.0,0.5,150,1,982.237,1.0,0.9996
train_LogReg_e573c642,TERMINATED,172.28.0.2:1244,1.0,0.0,50,1,121.192,1.0,0.9994
train_LogReg_2eedf1a6,TERMINATED,172.28.0.2:1244,1.0,1.0,150,1,566.45,0.999889,0.9996
train_LogReg_7735e824,TERMINATED,172.28.0.2:1244,10.0,0.5,50,1,347.994,1.0,0.9994
train_LogReg_c8ddb476,TERMINATED,172.28.0.2:1244,0.01,0.0,100,1,212.953,0.998822,0.9998
train_LogReg_984e7c54,TERMINATED,172.28.0.2:1244,0.01,1.0,50,1,77.7567,0.994022,0.9968
train_LogReg_173f94c6,TERMINATED,172.28.0.2:1244,0.1,0.5,50,1,189.681,0.998689,0.9996
train_LogReg_459e141e,TERMINATED,172.28.0.2:1244,100.0,0.5,150,1,933.067,1.0,0.9994
train_LogReg_b6b24dbe,TERMINATED,172.28.0.2:1244,1.0,0.0,150,1,356.158,1.0,0.9996


2022-07-22 04:22:14,265	INFO tune.py:748 -- Total run time: 5379.34 seconds (5379.09 seconds for the tuning loop).


BOHB Example: https://docs.ray.io/en/latest/tune/examples/includes/bohb_example.html