In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dataLoad import PulsarData
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# For pretty plotting
plt.style.use('seaborn-paper')
plt.rcParams["font.family"] = "serif"

In [3]:
class NeuralN(nn.Module):
    def __init__(self,inputsize,hiddensize):
        super(NeuralN, self).__init__()
        self.inputsize=inputsize
        self.hiddensize=hiddensize
        # an affine operation: y = Wx + b, this is basically a weight tensor!
        self.fcinput = nn.Linear(in_features=self.inputsize, out_features=self.hiddensize)
        self.fcoutput = nn.Linear(in_features=self.hiddensize, out_features=2)
    
    def forward(self,x: torch.Tensor):
        x = self.fcinput(x)
        x = F.relu(x)
        x = self.fcoutput(x)
        return x

In [11]:
epochs = 10

In [4]:
raw_features = PulsarData('HTRU_2').features
raw_targets = PulsarData('HTRU_2').targets

In [5]:
net = NeuralN(raw_features.shape[1], 12)

In [6]:
train_features_data, test_features_data, train_targets_data, test_targets_data  =  train_test_split( raw_features, 
                                                        raw_targets, test_size=0.25, random_state=42)

In [7]:
# Converting into torch structure
train_features_data, test_features_data = torch.from_numpy(train_features_data.to_numpy()).float(), torch.from_numpy(test_features_data.to_numpy()).float()
train_targets_data, test_targets_data = torch.from_numpy(train_targets_data.to_numpy()).long(), torch.from_numpy(test_targets_data.to_numpy()).long()

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)
net.train()

NeuralN(
  (fcinput): Linear(in_features=8, out_features=12, bias=True)
  (fcoutput): Linear(in_features=12, out_features=2, bias=True)
)

In [13]:
for e in range(epochs):
    epoch_losses = list()
    for n in range(train_features_data.shape[0]):
        net.zero_grad()
        optimizer.zero_grad() 
        prediction = net(train_features_data[n]).unsqueeze(0)
        target = train_targets_data[n].unsqueeze(0)
        # Calculating the loss function
        loss = criterion(prediction,target)
        epoch_losses.append(float(loss))
        # Calculating the gradient
        loss.backward()
        optimizer.step()
    print(e, np.mean(epoch_losses))

net.eval()

0 1.1014875446688606
1 0.10554584375737733
2 0.09324232910350413
3 0.09023606683957107
4 0.08852528330455149
5 0.08732796615019854
6 0.08650611924790122
7 0.08579910656781625
8 0.08526118259591524
9 0.08480712671037732


NeuralN(
  (fcinput): Linear(in_features=8, out_features=12, bias=True)
  (fcoutput): Linear(in_features=12, out_features=2, bias=True)
)

In [14]:
train_prediction = torch.argmax(net(train_features_data),dim=1)
acc_train = torch.mean((train_prediction == train_targets_data).float())
test_prediction = torch.argmax(net(test_features_data),dim=1)
acc_test = torch.mean((test_prediction == test_targets_data).float())

print(acc_train, acc_test)

tensor(0.9753) tensor(0.9750)


In [8]:
def GP_CrossValidation(max_iter_predict,n_restarts_optimizer, data, targets):
    """Decision Tree cross validation.
       Fits a Decision Tree with the given paramaters to the target 
       given data, calculated a CV accuracy score and returns the mean.
       The goal is to find combinations of max_depth, min_samples_leaf 
       that maximize the accuracy
    """
    
    estimator = GaussianProcessClassifier(random_state=42, 
                                       max_iter_predict=max_iter_predict, 
                                       n_restarts_optimizer=n_restarts_optimizer)
    
    cval = cross_val_score(estimator, data, targets, scoring='accuracy', cv=5)
    
    return cval.mean()

In [9]:
def optimize_GP(data, targets, pars, n_iter=5):
    """Apply Bayesian Optimization to Decision Tree parameters."""
    
    def crossval_wrapper(max_iter_predict, n_restarts_optimizer):
        """Wrapper of Decision Tree cross validation. 
           Notice how we ensure max_depth, min_samples_leaf 
           are casted to integer before we pass them along.
        """
        return GP_CrossValidation(max_iter_predict=int(max_iter_predict), 
                                            n_restarts_optimizer=int(n_restarts_optimizer), 
                                            data=data, 
                                            targets=targets)

    optimizer = BayesianOptimization(f=crossval_wrapper, 
                                     pbounds=pars, 
                                     random_state=42, 
                                     verbose=2)
    optimizer.maximize(init_points=4, n_iter=n_iter)

    return optimizer


In [10]:
parameters_BayesianOptimization = {"max_iter_predict": (0, 10), 
                                   "n_restarts_optimizer": (50, 150),
                                  }

BayesianOptimization = optimize_GP(train_features_data, 
                                             train_targets_data, 
                                             parameters_BayesianOptimization, 
                                             n_iter=5)
print(BayesianOptimization.max)

|   iter    |  target   | max_it... | n_rest... |
-------------------------------------------------


KeyboardInterrupt: 