# Motivation 

- Train a deep ensemble to build a reward distribution (which k-of-n will sample for it later) using MNIST regression.

- convert MNIST labels into a reward vector using the following equations : 

    - $R(label) = label+1$ if right (reward)
    
    - $R(label)= \frac{-1}{9} * lable$ if wrong (risk)

## import lib

In [None]:
import torch
import time
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms as transforms
from IPython import display

## Load dataset

In [None]:
transform = transforms.ToTensor()
mnist_train = torchvision.datasets.MNIST('datasets', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(mnist_train, shuffle=False)

## Hyper-parameters

In [None]:
n_esambles = 10 #number of models in Ensamble
n_epochs = 100   #number of epochs to train each model
batch_size = 512 # batch size
learning_rate = 1.6e-3 #learning rate
output_models_dir = "" # directory path where you want to save models

## Data pre-processing

In [None]:
def action_to_reward (a):
    '''
    Convert MNIST label into a reward vector
    Args:
    a: (tensor) has shape (number of samples, ) MNIST labels
    return:
    new_y: (tensor) has shape (number of samples, 10) reward vector
    '''
    new_y = torch.zeros((a.shape[0], 10))
    for i in range (a.shape[0]):
        new_y[i] = -(1/9)*torch.arange(10)
        new_y[i, a[i]] = a[i]+1
    return new_y/10 # devide by 10 just to normalize reward vector

device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
    
training_set = np.zeros((len(trainloader) , 794))
for i, data in enumerate (trainloader):
    img, label = data
    training_set[i, : 784] = img.view(-1).numpy()
    training_set[i ,784:] = action_to_reward(label).numpy()

## re-weight parameters ###    
re_weight_vector = torch.ones(10).to(device)
for i in range (10):
    re_weight_vector[i] = 1/(i+1)**2

## Train Ensemble

In [None]:
training_loss = np.zeros((n_esambles, n_epochs)) #training loss during training
times = [] #time to train each model
loss_fun = nn.MSELoss(size_average=False, reduce=False) #Loss function 

for m in range (n_esambles):
    t = time.perf_counter() # reset timer for each model
    model = nn.Sequential(
        nn.Conv2d(1, 64, (4,4)),
        nn.MaxPool2d((2,2)),
        nn.ReLU(),

        nn.Conv2d(64, 16, (4,4)),
        nn.MaxPool2d((2,2)),
        nn.ReLU(),

        nn.Flatten(),

        nn.Linear(256,50),
        nn.ReLU(),
        
        nn.Linear(50,15),
        nn.ReLU(),
        
        nn.Linear(15,10),
    ).to(device)
    opt = torch.optim.Adam(params = model.parameters(), lr=learning_rate)

    for ep in range (n_epochs):
        l=0
        np.random.shuffle(training_set)
        hh=0
        for batch in range (0, training_set.shape[0] , batch_size):
            hh+=1
            x = torch.tensor(training_set[batch : batch + batch_size:, :784], device=device, dtype=torch.float)
            x = x.view(x.shape[0], 1, 28, 28)
            y = torch.tensor(training_set[batch : batch + batch_size, 784:], device=device,  dtype=torch.float)
            loss = torch.mean(loss_fun(model(x), y)*re_weight_vector) # multiple loss by re-weight vector
            l+=loss.item()

            opt.zero_grad()
            loss.backward()
            opt.step()
        print("model: %i , [EPOCH]: %i, [training LOSS]: %.6f" % (m, ep+1, l/hh))
        display.clear_output(wait=True)
        training_loss[m, ep] = l/hh

    torch.save(model, output_models_dir+"ensemble_model_{}".format(m)) # save each model by model number
    times.append( time.perf_counter() - t)
np.save(output_models_dir+"training_loss", training_loss) # save training loss for all models in the ensamble

## Calculate training time

In [None]:
print("average training time for a single model is {} min".format(np.round(np.mean(times)/60, 3)))

## Calculate accuracy of the last model 

In [None]:
acc = 0
y_pre = np.zeros(training_set.shape[0])
y_i = np.argmax(training_set[:, 784:], axis=1)
for b in range (0, training_set.shape[0], batch_size):
    x_i = torch.tensor(training_set[b : b + batch_size:, :784], device=device, dtype=torch.float)
    y_pre[b:batch_size+b] = np.argmax(model(x_i.view(x_i.shape[0],1, 28,28 )).detach().cpu().numpy(), axis=1)

for i in range (training_set.shape[0]):
    if y_pre[i]==y_i[i]:
        acc += 1
print("model accuracy: {}".format(np.round(100*acc/training_set.shape[0], 3)))

## Plot training loss for all models

In [None]:
import matplotlib.pyplot as plt
from matplotlib import style
style.use("ggplot")
for i in range (n_esambles):
    plt.plot(training_loss[i, :])
plt.xlabel("epoch")
plt.ylabel("Training Loss")
plt.show()