In [None]:
import os
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import sys
sys.path.insert(1, '/home/kseuro/Kai/deeplearnphysics/pytorch/particle_generator/')

# My stuff
import ae
import utils
from dataloader import LArCV_loader

In [None]:
# Set the GPU (GPU 1 is the best option)
device = torch.device(2)

In [None]:
# Set the root path of the AutoEncoder experiments folder
exp_root = "/media/hdd1/kai/particle_generator/experiments/larcv_ae/"

## Get the names of all the experiments in the exp_root folder

In [None]:
exp_paths = []
for path in os.listdir(exp_root):
    exp_paths.append(os.path.join(exp_root, path))

print("-"*60)
for i in range(len(exp_paths)):
    print("\n Exp_{}:".format(str(i)), exp_paths[i], '\n')
    print("-"*60)

In [None]:
# Set the dir of the particular experiment to be deployed
exp_dir = exp_paths[2]

In [None]:
# Create the full path to the experiment
exp_path = os.path.join(exp_root, exp_dir) + "/"
print("Experiment path set as: \n{}".format(exp_path))

In [None]:
# Load the config csv as a dict
config_csv = exp_path + "config.csv"
config_df = pd.read_csv(config_csv, delimiter = ",")

In [None]:
# Get the model architecture from config df
n_layers = int(config_df[config_df['Unnamed: 0'].str.contains("n_layers")==True]['0'].item())
l_dim    = int(config_df[config_df['Unnamed: 0'].str.contains("l_dim")==True]['0'].item())
im_size  = int(config_df[config_df['Unnamed: 0'].str.contains("dataset")==True]['0'].item())**2
im_dim   = int(np.sqrt(im_size))

In [None]:
# Path to model weights
weights_dir = "weights/"

## Path to the test data

In [None]:
test_data = "/media/hdd1/kai/particle_generator/larcv_data/test/larcv_png_{}/".format(im_dim)
num_test_ex = sum( [len(examples) for _, _, examples in os.walk(test_data)] )
print("{} test data will be loaded from: \n{}".format(num_test_ex, test_data))

## Path to training data

In [None]:
# Path to the training data
train_data = "/media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_{}/".format(im_dim)
print("Training data will be loaded from: \n{}".format(train_data))

## Set up the torch dataloader

In [None]:
loader_kwargs = {'num_workers' : 2, 'batch_size': 1, 'shuffle': True}
transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5],[0.5])])
test_dataset    = LArCV_loader(root = test_data,  transforms = transforms)
train_dataset   = LArCV_loader(root = train_data, transforms = transforms) 

In [None]:
test_loader = DataLoader(test_dataset, **loader_kwargs)
train_loader = DataLoader(train_dataset, **loader_kwargs)

## Setup AE layers

In [None]:
base = [256] 

# Compute encoder sizes
sizes = lambda: [ (yield 2**i) for i in range(n_layers) ]
enc_sizes = base * n_layers
enc_sizes = [a*b for a,b in zip(enc_sizes, [*sizes()])][::-1]

# Update kwarg dicts
# Decoder is the reverse of the encoder
ae_kwargs = {'enc_sizes' : enc_sizes, 'l_dim' : l_dim, 'im_size' : im_size, 'dec_sizes' : enc_sizes[::-1]}

## Setup the model loss function

In [None]:
loss_fn = nn.MSELoss().to(device)

## Load model checkpoint

In [None]:
# Keep track of MSE results for train and test set for each checkpoint
MSE_results = []

In [None]:
# Get checkpoint name(s)
checkpoint_path  = exp_path + weights_dir
checkpoint_names = []
for file in os.listdir(checkpoint_path):
    checkpoint_names.append(os.path.join(checkpoint_path, file))

In [None]:
print("-"*60)
for i in range(len(checkpoint_names)):
    print("\n{} :".format(str(i)), checkpoint_names[i], '\n')
    print("-"*60)

In [None]:
current_checkpoint = checkpoint_names[6]

In [None]:
# Load the model checkpoint
# Keys: ['state_dict', 'epoch', 'optimizer']
checkpoint = torch.load(current_checkpoint)

In [None]:
# Load the model on GPU
model = ae.AutoEncoder(**ae_kwargs).to(device)

In [None]:
# Load the model's state dictionary
# Note: The IncompatibleKeys(missing_keys=[], unexpected_keys=[]) message indicates that
#       there were no problems in loading the state dictionary. Bit confusing...
model.load_state_dict(checkpoint['state_dict'])

In [None]:
# Put the model in training mode
model.train()

## Loop over the test data and record the average loss for the checkpoint

In [None]:
test_losses = []

In [None]:
for idx, image in enumerate(test_loader):
    # Flatten image and copy to gpu
    image = image.view(loader_kwargs['batch_size'], -1).to(device)
    
    # Forward pass
    output = model(image)
    
    # Get the loss value for the batch
    loss = loss_fn(output, image)
    
    # Append loss value
    test_losses.append(float(loss.item()))

In [None]:
avg_test_loss = np.mean(test_losses)

In [None]:
print("Average test loss: {} ".format( round(avg_test_loss, 3) ) )

## Loop over the training data and record the average loss for the checkpoint
- Here, we only loop over the same number of training examples as there are test examples

In [None]:
train_losses = []

In [None]:
for idx, image in enumerate(train_loader):    
    # Flatten image and copy to gpu
    image = image.view(loader_kwargs['batch_size'], -1).to(device)
    
    # Forward pass
    output = model(image)
    
    # Get the loss value for the batch
    loss = loss_fn(output, image)
    
    # Append loss value
    train_losses.append(float(loss.item()))
    
    if (idx + 1) == num_test_ex:
        break

In [None]:
avg_train_loss = np.mean(train_losses)

In [None]:
print("Average train loss: {} ".format( round(avg_train_loss, 3) ) )

## Find the difference between the test loss and train loss

In [None]:
delta_MSE = abs( avg_train_loss - avg_test_loss )

In [None]:
print("delta_MSE: {}".format( round(delta_MSE, 3)))
print('For checkpoint: \n{}'.format(current_checkpoint))

## Append the results to list

In [None]:
results = [avg_test_loss, avg_train_loss, delta_MSE, current_checkpoint]
MSE_results.append(results)

In [None]:
## Checkpoint Analysis
for i in range(len(MSE_results)):
    print("delta_MSE: {}".format(round(MSE_results[i][2], 5)))

## Save evaluation results to experiment folder

In [None]:
eval_file = exp_path + "checkpoint_evaluation.txt"

In [None]:
with open(eval_file, 'w+') as file_object:
    for exp in MSE_results:
        test_loss  = exp[0]
        train_loss = exp[1]
        delta_MSE  = exp[2]
        checkpoint = exp[3]
        line1 = "Checkpoint: {}\n".format(checkpoint)
        line2 = "test_loss: {}, train_loss: {}, delta_MSE: {}".format(test_loss, train_loss, delta_MSE)
        line = line1 + line2 + "\n" # Punctuation with a newline character is a Unix best practice
        file_object.write(line)