## Setting up 

In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import nn
from torch.utils.data import random_split
from torch.utils.data import DataLoader
#from torchvision.transforms import ToTensor
#from torchvision.datasets import MNIST
from torch.optim import Adam
#from torch._C import device
#from torch.functional import broadcast_shapes

from lenet import Bayesian_LeNet_R, LeNet_R


# commented out dependencies will be deleted when this notebook is done

## Setting up our datasets and hyperparameters

In [2]:
'''
this dataset contains 1000 images of galsim galaxies with the image itself as
X with and with the sersic index used to generate each one as Y
(the galaxies in the images are comprised of a single sersic profile
with 15 arcsec half light radius and varying n)

the clean dataset has no noise
and the noisy dataset has noise

the datasets come in from the .pt files as pytorch Datasets
'''
clean_galaxies = torch.load('clean_single_component_galaxies_10_values_of_n.pt')
#noisy_galaxies = torch.load('noisy_single_component_galaxies_1000_values_of_n.pt')


In [3]:
#clean_galaxies[0] <- a tuple (x, y)

In [4]:
# !!! - dataset and model hardcoded here
data = clean_galaxies
MODEL = Bayesian_LeNet_R

In [5]:
# setting hyperparameters
learning_rate = 1e-3
batch_size = 50
epochs = 10
N = len(data)

In [6]:
# setting the ratio of train:val:test data
train_portion = 0.6
val_portion = 0.2
test_portion = 0.2

In [7]:
# splitting the dataset in to training, validation and testing
n_train = int(train_portion * N)
n_val = int(val_portion * N)
n_test = int(test_portion * N)

In [8]:
if n_train + n_val + n_test != N: print("Warning: some datapoints were excluded.")

In [9]:
(train_data, val_data, test_data) = random_split(data, [n_train, n_val, n_test], generator=torch.Generator().manual_seed(84))

In [10]:
# initializing dataloaders
train_data_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_data_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_data_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

In [11]:
# calculating steps per epoch for training+val set
# these numbers are only used for keeping record of the 
# average training + val loss
# they aren't directly used in training
training_steps = len(train_data) // batch_size
validation_steps = len(val_data) // batch_size

In [12]:
len(train_data)

6

## Initializing our model and training

In [13]:
# initializing the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MODEL(n_channels=1).to(device)
optimizer = Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()

In [14]:
# tracking training history
hist = {
    "train_loss": [],
    "train_error": [],
    "val_loss": [],
    "val_error": []
}

In [20]:
%%time
for i in range(epochs):
        # set model to training mode
        model.train()
        # initializing total training, val loss
        total_train_loss = 0
        total_val_loss = 0
        # initializing total error (in units matching the sersic index)
        total_train_error = 0
        total_val_error = 0

        # training step
        for (x, y) in train_data_loader:
            # send input to device
            (x, y) = (x.to(device), y.to(device))

            # perform a forward pass and calc the training loss
            prediction = model(x)
            loss = loss_fn(prediction, y)
            error = abs(prediction - y)
            print("X shape: ", x.shape)
            print("Y shape: ", y.shape)
            print("Prediction shape: ", prediction.shape)
            print("Error shape: ", error.shape)

            # setting gradients to zero, performing backprop and updating the weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # tracking training loss and num correct
            total_train_loss += loss
            total_train_error += error

        # validation step
        with torch.no_grad(): # turning off autograd for evaluation
            # setting model to evaluation mode
            model.eval()

            for (x, y) in val_data_loader:
                # send input to device
                (x, y) = (x.to(device), y.to(device))

                # perform a forward pass and calc the training loss
                prediction = model(x)
                loss = loss_fn(prediction, y)
                error = abs(prediction - y)
                total_val_loss += loss
                total_val_error += error

        # adding stats to the history object
        
        avg_train_loss = total_train_loss #/ training_steps
        avg_val_loss = total_val_loss #/ validation_steps
        avg_train_error = total_train_error #/ len(train_data_loader.dataset)
        avg_val_error = total_val_error #/ len(val_data_loader.dataset)

        hist['train_loss'].append(avg_train_loss.cpu().detach().numpy())
        hist['train_error'].append(avg_train_error)
        hist['val_loss'].append(avg_val_loss.cpu().detach().numpy())
        hist['val_error'].append(avg_val_error)

        # printing
        print("Epoch {}/{}".format(i+1, epochs))
        print("Training Loss: {}, Training Error: {}".format(avg_train_loss, avg_train_error))
        print("Validation Loss: {}, Validation Error: {}".format(avg_val_loss, avg_val_error))



Shape of input to forward method:  torch.Size([6, 1, 495, 495])
Shape of x before 2nd convolution:  torch.Size([6, 20, 245, 245])
Shape of x before flatten layer:  torch.Size([6, 50, 120, 120])
Shape of x after flatten layer / before FC 1:  torch.Size([6, 720000])
Shape of x before passing to final FC layer:  torch.Size([6, 6])
Shape of output:  torch.Size([6])
X shape:  torch.Size([6, 1, 495, 495])
Y shape:  torch.Size([6])
Prediction shape:  torch.Size([6])
Error shape:  torch.Size([6])
Shape of input to forward method:  torch.Size([2, 1, 495, 495])
Shape of x before 2nd convolution:  torch.Size([2, 20, 245, 245])
Shape of x before flatten layer:  torch.Size([2, 50, 120, 120])
Shape of x after flatten layer / before FC 1:  torch.Size([2, 720000])
Shape of x before passing to final FC layer:  torch.Size([2, 6])
Shape of output:  torch.Size([2])
Epoch 1/10
Training Loss: 139.583740234375, Training Error: tensor([13.9808, 12.2031, 11.3143, 11.7587, 10.4255, 10.8699],
       grad_fn=<Add

Shape of x before flatten layer:  torch.Size([2, 50, 120, 120])
Shape of x after flatten layer / before FC 1:  torch.Size([2, 720000])
Shape of x before passing to final FC layer:  torch.Size([2, 6])
Shape of output:  torch.Size([2])
Epoch 8/10
Training Loss: 4.9018073081970215, Training Error: tensor([2.3378, 1.4490, 4.1156, 0.5601, 1.8934, 1.0045],
       grad_fn=<AddBackward0>)
Validation Loss: 19.572399139404297, Validation Error: tensor([5.8290, 2.2733])
Shape of input to forward method:  torch.Size([6, 1, 495, 495])
Shape of x before 2nd convolution:  torch.Size([6, 20, 245, 245])
Shape of x before flatten layer:  torch.Size([6, 50, 120, 120])
Shape of x after flatten layer / before FC 1:  torch.Size([6, 720000])
Shape of x before passing to final FC layer:  torch.Size([6, 6])
Shape of output:  torch.Size([6])
X shape:  torch.Size([6, 1, 495, 495])
Y shape:  torch.Size([6])
Prediction shape:  torch.Size([6])
Error shape:  torch.Size([6])
Shape of input to forward method:  torch.S

In [19]:
torch.save(model, "Bayesian_LeNet_Regression_Mar29.pth")

In [17]:
# note to self, when cleaning up this notebook into scripts, change the 'unknown number' in the bayesian lenet R class, 
# figure out what it is, and in a cell above, call the network with the params inserted