In [28]:
import os
import typing

import numpy as np
import torch
import torch.optim
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score, average_precision_score
from torch import nn
from torch.nn import functional as F
from tqdm import trange
import tqdm
from torch.distributions import Poisson
from collections import deque
import copy

from util import ece, ParameterDistribution, draw_reliability_diagram, draw_confidence_histogram, SGLD
from enum import Enum

from torch.utils.data import DataLoader
import json

In [29]:
class Framework(object):
    def __init__(self, dataset_train:torch.utils.data.Dataset, *args, **kwargs):
        """
        Basic Framework for your bayesian neural network.
        Other solutions like MC Dropout, Ensemble learning will based upon this.
        """
        self.train_set = dataset_train
        self.print_interval = 100 # number of batches until updated metrics are displayed during training

    def train(self):
        raise NotImplementedError()

    def predict(self, data_loader: torch.utils.data.DataLoader) -> np.ndarray:
        """
        Predict the class probabilities using your trained model.
        This method should return an (num_samples, 10) NumPy float array
        such that the second dimension sums up to 1 for each row.

        :param data_loader: Data loader yielding the samples to predict on
        :return: (num_samples, 10) NumPy float array where the second dimension sums up to 1 for each row
        """
        probability_batches = []
        
        for batch_x, _ in tqdm.tqdm(data_loader):
            current_probabilities = self.predict_probabilities(batch_x).detach().numpy()
            probability_batches.append(current_probabilities)

        output = np.concatenate(probability_batches, axis=0)
        assert isinstance(output, np.ndarray)
        assert output.ndim == 2 and output.shape[1] == 10
        assert np.allclose(np.sum(output, axis=1), 1.0)
        return output

    def predict_probabilities(self, x: torch.Tensor) -> torch.Tensor:
        raise NotImplementedError()

In [30]:
def evaluate(model:Framework, eval_loader: torch.utils.data.DataLoader, data_dir: str, output_dir: str):
    """
    Evaluate your model.
    :param model: Trained model to evaluate
    :param eval_loader: Data loader containing the training set for evaluation
    :param data_dir: Data directory from which additional datasets are loaded
    :param output_dir: Directory into which plots are saved
    """
    print("evaulating")
    # Predict class probabilities on test data
    predicted_probabilities = model.predict(eval_loader)
    

    # Calculate evaluation metrics
    predicted_classes = np.argmax(predicted_probabilities, axis=1)
    actual_classes = eval_loader.dataset.tensors[1].detach().numpy()
    accuracy = np.mean((predicted_classes == actual_classes)) 
    ece_score = ece(predicted_probabilities, actual_classes)
    print(f'Accuracy: {accuracy.item():.3f}, ECE score: {ece_score:.3f}')
    score = accuracy.item()+3*(0.5-ece_score)
    print(f'score: {score:.3f}')

In [31]:
class MNISTNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.LazyConv2d(16, 3, 1)
        self.conv2 = nn.LazyConv2d(32, 3, 1)
        self.fc1 = nn.LazyLinear(128)
        self.fc2 = nn.LazyLinear(10)
        # self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.dropout(x,training=True,p=0.5)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x,training=True,p=0.5)
        x = self.fc2(x)
        # output = self.softmax(x)
        return x
#     def __init__(self,
#                 in_features: int, 
#                 out_features: int,
#                 dropout_p=0,
#                 dropout_at_eval=False
#                 ):
#         super().__init__()
#         # TODO General_2: Play around with the network structure.
#         # You could change the depth or width of the model
#         self.layer1 = nn.Linear(in_features,200)
#         self.layer2 = nn.Linear(200, 200)
#         self.layer3 = nn.Linear(200, out_features)
#         self.dropout_p = dropout_p
#         self.dropout_at_eval = dropout_at_eval

#     def forward(self, x):
#         # TODO General_2: Play around with the network structure
#         # You might add different modules like Pooling 
#         x = F.dropout(
#                 F.relu(self.layer1(x)),
#                 p=self.dropout_p,
#                 training=self.training or self.dropout_at_eval
#         )
#         x = F.dropout(
#                 F.relu(self.layer2(x)),
#                 p=self.dropout_p,
#                 training=self.training or self.dropout_at_eval
#         )

#         class_probs = self.layer3(x)
#         return class_probs

In [32]:

class DropoutTrainer(Framework):
    def __init__(self, dataset_train,
                 *args, **kwargs):
        super().__init__(dataset_train, *args, **kwargs)

        # Hyperparameters and general parameters
        # TODO: MC_Dropout_4. Do experiments and tune hyperparameters
        self.batch_size = 128
        self.learning_rate = 1e-3
        self.num_epochs = 50
        torch.manual_seed(0) # set seed for reproducibility
        
        # TODO: MC_Dropout_1. Initialize the MC_Dropout network and optimizer here
        # You can check the Dummy Trainer above for intuition about what to do
        self.network = MNISTNet()
        self.train_loader = DataLoader(
            dataset_train, batch_size=self.batch_size, shuffle=True, drop_last=True
            )
        self.optimizer = torch.optim.Adam(self.network.parameters(), lr=self.learning_rate) 
        

    def train(self):
        self.network.train()
        self.network  = self.network.to("cuda")
        # self.train_loader  = self.train_loader.to("cuda")
        progress_bar = trange(self.num_epochs)
        for _ in progress_bar:
            for batch_idx, (batch_x, batch_y) in enumerate(self.train_loader):
                # batch_x are of shape (batch_size, 784), batch_y are of shape (batch_size,)

                batch_x = batch_x.to("cuda").view(-1,1,28,28)
                batch_y = batch_y.to("cuda")

                self.network.zero_grad()
                # TODO: MC_Dropout_2. Implement MCDropout training here
                # You need to calculate the loss based on the literature
                preds = F.softmax(self.network(batch_x),dim=-1)
                loss = F.nll_loss(preds,batch_y)
                # preds = preds.cpu()
                # batch_y = batch_y.cpu()
                # ece_ = ece(preds,batch_y)
                # ece_ = ece_.to("cuda")

                # loss = loss + ece_
                # loss = F.nll_loss(preds,batch_y)

                # Backpropagate to get the gradients
                loss.backward()

                self.optimizer.step()
                # Update progress bar with accuracy occasionally
                if batch_idx % self.print_interval == 0:
                    current_logits = self.network(batch_x)
                    current_accuracy = (current_logits.argmax(axis=1) == batch_y).float().mean()
                    progress_bar.set_postfix(loss=loss.item(), acc=current_accuracy.item())
          

    def predict_probabilities(self, x: torch.Tensor, num_sample=100) -> torch.Tensor:
        assert x.shape[1] == 28 ** 2
        self.network.eval()

        x = x.to("cuda").view(-1,1,28,28)
        # TODO: MC_Dropout_3. Implement your MC_dropout prediction here
        # You need to sample from your trained model here multiple times
        # in order to implement Monte Carlo integration
        preds = []
        for i in range(10):
            pred = self.network(x)
            pred = F.softmax(pred,dim=-1)
            pred = pred.detach().cpu().numpy()
            preds.append(pred)

        preds = np.array(preds)

        # print("preds shape")
        # print(preds.shape)
        # print(preds)

        estimated_probability = preds.mean(axis=0)

        # print("estimated_probability shape")
        # print(estimated_probability.shape)
        # print(estimated_probability[0])

        estimated_probability = torch.from_numpy(estimated_probability)
        
        assert estimated_probability.shape == (x.shape[0], 10)  
        return estimated_probability


In [33]:
def run_solution(dataset_train: torch.utils.data.Dataset, data_dir: str = os.curdir, output_dir: str = '/results/'):
    """
    Run your task 2 solution.
    This method should train your model, evaluate it, and return the trained model at the end.
    Make sure to preserve the method signature and to return your trained model,
    else the checker will fail!

    :param dataset_train: Training dataset
    :param data_dir: Directory containing the datasets
    :return: Your trained model
    """


    trainer = DropoutTrainer(dataset_train=dataset_train)


    # Train the model
    print('Training model')
    trainer.train()

    # Predict using the trained model
    print('Evaluating model on training data')
    eval_loader = torch.utils.data.DataLoader(
        dataset_train, batch_size=64, shuffle=False, drop_last=False
    )
    evaluate(trainer, eval_loader, data_dir, output_dir)

    # IMPORTANT: return your model here!
    return trainer

In [34]:
# Load training data
data_dir = os.curdir
output_dir = os.curdir
raw_train_data = np.load(os.path.join(data_dir, 'train_data.npz'))
print(raw_train_data['train_x'].shape)
x_train = torch.from_numpy(raw_train_data['train_x']).reshape([-1, 784])
y_train = torch.from_numpy(raw_train_data['train_y']).long()
dataset_train = torch.utils.data.TensorDataset(x_train, y_train)

# Run actual solution
trainer = run_solution(dataset_train, data_dir=data_dir, output_dir=output_dir)

(20000, 28, 28)
Training model


100%|██████████| 50/50 [00:28<00:00,  1.77it/s, acc=0.984, loss=-.981]


Evaluating model on training data
evaulating


100%|██████████| 313/313 [00:02<00:00, 106.51it/s]

Accuracy: 0.991, ECE score: 0.009
score: 2.465





In [35]:
trainer.network = trainer.network.cpu()
torch.save(trainer.network.state_dict(),"trainer.pth")
# torch.save(trainer.state_dict(), 'trainer.pth')