In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
import time
import plotly
from datetime import datetime
import optuna
import sklearn
from sklearnex import patch_sklearn
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
SEED = 10
limit_rows = None

In [3]:
print(f"Execution started at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}.")

Execution started at 2022-03-02 14:38:09.


In [4]:
def load_csv(name):
    if limit_rows is None:
        full_filename = f'../Data Preprocessing/sklearn/full/iot23_combined_{name}.csv'
    else:
        full_filename = f'../Data Preprocessing/sklearn/semi/iot23_combined_{int(limit_rows/1000)}k_{name}.csv'
    
    df = pd.read_table(filepath_or_buffer=full_filename, header=None, sep=',').infer_objects().to_numpy()
    
    return df.ravel() if df.shape[1] == 1 else df

In [5]:
X_train, X_test, y_train, y_test = load_csv('X_train'), load_csv('X_test'), load_csv('y_train'), load_csv('y_test')

print('X_train',X_train.shape,'\ny_train',y_train.shape)
print('X_test',X_test.shape,'\ny_test',y_test.shape)

X_train (7142855, 15) 
y_train (7142855,)
X_test (1785714, 15) 
y_test (1785714,)


In [6]:
from sklearn.naive_bayes import ComplementNB

def objective(trial):
    
    alpha     = trial.suggest_discrete_uniform("alpha", 0.1, 1.0, 0.1)
    fit_prior = trial.suggest_categorical('fit_prior', [False, True])
    norm      = trial.suggest_categorical('norm', [False, True])
    
    classifier_obj = ComplementNB(alpha=alpha,fit_prior=fit_prior,norm=norm)
    
    classifier_obj.fit(X_train, y_train)
    
    y_pred = classifier_obj.predict(X_test)
    
    accuracy = sklearn.metrics.accuracy_score(y_test, y_pred)
    
    return accuracy

In [7]:
print(f"Optimization started at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}.")

Optimization started at 2022-03-02 14:38:16.


In [None]:
study = optuna.create_study(direction="maximize")

study.optimize(objective, n_trials=100, n_jobs=8, catch=(NotImplementedError,))

[32m[I 2022-03-02 14:38:16,846][0m A new study created in memory with name: no-name-d296b862-0c8e-412e-b12e-9ff1bfed9f2c[0m


In [None]:
print(f"Optimization finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}.")

In [None]:
def format_trial(trial):
    return json.dumps({'number' : trial.number,
                       'score'  : trial.values[0],
                       'params' : trial.params}, indent=4, default=str)

In [None]:
print(format_trial(study.best_trial))

In [None]:
for trial in study.best_trials:
    print(format_trial(trial))

In [None]:
optuna.visualization.plot_optimization_history(study).show()