# Tutorial for quante_carlo
- <b>quante_carlo</b> is a multiprocess hyperparameter tuning module. 
- This notebook demonstrates how to use this module to determine the number of neurons to use in a pytorch neural network.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from quante_carlo import hp_tune
import time
import numpy as np
import multiprocessing as mp
from torch import nn
import neural_network

## This tutorial uses mnist dataset available <a href="https://www.kaggle.com/code/imdevskp/digits-mnist-classification-using-cnn">here</a>

### Multiprocessing
This uses multiprocessing to train. Each process interacts with the Bayesian Optimization API individually.

Included with this repository is a file named neural_network.py. It's better to put the objective function you want to optimize in a file.
<br> In this example, the worker.py file defines an evaluation function called instance





- The function takes the parameters for each instance from a field defined by the key 'hparameters'.
- The function also returns 1 - loss because the NeuralNetwork is minimizing loss and the Optimizer is set to 'maximize' the function.


### Main 
- The network has 3 hidden layers and the ranges of each layer are as follows: [[32, 512], [32, 1024], [32, 512]],
- Notice that there are 16 gpr processors (Bayesian Optimization step) and 4 GPU processers (training step)
- Because the output layer is a vector of length 10 of ones and zeros we are using BCEWithLogitsLoss.
- I am not using a softmax final function because for prediction, it seems to perform better without one.

In [None]:

gbatch_size = 200                      # number of elements in the gaussian optimzation batch
g_procs = 4                            # cpus, number of batches for the gaussian optimization phase
hp_ranges = [[32, 512], [32, 1024],    # the user defined function has to determine what to do with
             [32, 512], [.0001, .01]]  # numbers randomly generated from this field
n_procs = 2                            # number of gpus, need to be careful if using all 4, weird things can happen
n_iter = 20                            # bayes optimization iterations
logfile_name = 'logfile_2b.txt'


oparameters = {'input_layer_size': 28*28, 
               'output_layer_size': 10, 
               'train_iterations': 10,   # neural network training iterations
               'n_batches': 2,           # number of mini batches of batch_size for training
               'batch_size': .01,        # size of mini batch, percent of total dataset
               'train_test_files': {'x_train': 'nn_datasets/X_train.csv', # location of train, test files
                                    'x_test': 'nn_datasets/X_train.csv',
                                    'y_train': 'nn_datasets/y_train.csv', 
                                    'y_test': 'nn_datasets/y_test.csv'},
               'device': 'cuda'}


In [None]:
if __name__ == '__main__':
    
    mnist_session = hp_tune.session(neural_network.instance,
                                    hp_ranges=hp_ranges, 
                                    batch_sz=gbatch_size, n_gpr_processors=g_procs, 
                                    n_processors=n_procs, n_iter=n_iter, 
                                    other_parameters = oparameters, log_file=logfile_name, use_qc=False, bo_url='https://boaz.onrender.com')
    p = mp.Pool()
    start = time.time()
    tuning_results  = mnist_session.tune(p)

    print("{} total seconds".format(round(time.time() - start,2)))
    p.close()


In [None]:
summary = mnist_session.summary()
summary[summary['score']>.92]

In [None]:
n = summary[['score', 'iteration']].groupby('iteration').max().plot()
best = [max(summary[summary['iteration']<=i]['score']) for i in range(n_iter+1)]
p = plt.plot(best)
plt.savefig('hptune4.png')

In [None]:
n = summary[['score', 'iteration']].groupby('iteration').max().plot()
best = [max(summary[summary['iteration']<=i]['score']) for i in range(n_iter+1)]
p = plt.plot(best)
plt.savefig('hptune_11152024.png')

In [None]:
n = summary[['score', 'iteration']].groupby('iteration').max().plot()
best = [max(summary[summary['iteration']<=i]['score']) for i in range(n_iter+1)]
p = plt.plot(best)
plt.savefig('hptune.png')

In [None]:
import neptune
run = neptune.init_run(
    project="mshipman/HPTune",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIzNzhiNTgwMC05MDAzLTQ4NTQtOTk1NC04YWFlN2JkMDg4NWEifQ==",
)

summary.to_csv("summary.csv", index=False)
run["summary/score_history.csv"].upload("summary.csv")

params = {"limits": '('+') ('.join(["{},{}".format(x[0], x[1]) for x in hp_ranges])+')', 
          "gpr_batch_size": gbatch_size,
          "n_gpr_processors": g_procs, "n_processors": n_procs,
          "n_iterations": n_iter, "other_parameters": oparameters}
run["parameters"] = params
run["summary/best_by_iteration.png"].upload("hptune.png")
run['historical/best'] = ','.join([str(x) for x in best])
run["log"].upload(logfile_name)
run.stop()
