### This notebook is meant for the generaton of code_vector targets from the encoder branch of a trained AutoEncoder

In [None]:
import os
import torch
import numpy as np
import cv2 as cv
import pandas as pd
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import sys
sys.path.insert(1, '/home/kseuro/Kai/deeplearnphysics/pytorch/particle_generator/')

# My stuff
import ae
import utils
from dataloader import LArCV_loader

In [None]:
# Set the root path of the particle generator experiments folder
exp_root = "/media/hdd1/kai/particle_generator/experiments/larcv_ae/"

## Get the names of all the experiments in the exp_root folder

In [None]:
exp_paths = []
for path in os.listdir(exp_root):
    exp_paths.append(os.path.join(exp_root, path))

print("-"*60)
for i in range(len(exp_paths)):
    print("\n Exp_{}:".format(str(i)), exp_paths[i], '\n')
    print("-"*60)

In [None]:
# Set the dir of the particular experiment for which to load a model
exp_dir = exp_paths[2] + "/"

In [None]:
# Create the full path to the experiment
exp_path = os.path.join(exp_root, exp_dir)
print("Experiment path set as: \n{}".format(exp_path))

In [None]:
# Path to model weights
weights_dir = "weights/"

In [None]:
# Load the config csv as a dict
config_csv = exp_path + "config.csv"
config_df = pd.read_csv(config_csv, delimiter = ",")

In [None]:
# Get the model architecture from config df
n_layers = int(config_df[config_df['Unnamed: 0'].str.contains("n_layers")==True]['0'].item())
l_dim    = int(config_df[config_df['Unnamed: 0'].str.contains("l_dim")==True]['0'].item())
im_size  = int(config_df[config_df['Unnamed: 0'].str.contains("dataset")==True]['0'].item())**2
im_dim   = int(np.sqrt(im_size))

In [None]:
print("im_dim: {}, l_dim: {}".format(im_dim, l_dim))

In [None]:
# Set up AE layer sizes
base = [256] 

# Compute encoder sizes
sizes = lambda: [ (yield 2**i) for i in range(n_layers) ]
enc_sizes = base * n_layers
enc_sizes = [a*b for a,b in zip(enc_sizes, [*sizes()])][::-1]

# Update kwarg dicts
# Decoder is the reverse of the encoder
ae_kwargs = {'enc_sizes' : enc_sizes, 'l_dim' : l_dim, 'im_size' : im_size, 'dec_sizes' : enc_sizes[::-1]}

## Load model checkpoint

In [None]:
# Get checkpoint name(s)
checkpoint_path  = exp_path + weights_dir
checkpoint_names = []
for file in os.listdir(checkpoint_path):
    checkpoint_names.append(os.path.join(checkpoint_path, file))

In [None]:
print("-"*60)
for i in range(len(checkpoint_names)):
    print("\n{} :".format(str(i)), checkpoint_names[i], '\n')
    print("-"*60)

In [None]:
# Select the desired checkpoint from the list
best_checkpoint = checkpoint_names[1]

In [None]:
# Load the model checkpoint
# Keys: ['state_dict', 'epoch', 'optimizer']
checkpoint = torch.load(best_checkpoint)

In [None]:
# Load the model on GPU (GPU 1 is the best option)
device = torch.device(2)
model = ae.AutoEncoder(**ae_kwargs).to(device)

In [None]:
# Load the model's state dictionary
# Note: The IncompatibleKeys(missing_keys=[], unexpected_keys=[]) message indicates that
#       there were no problems in loading the state dictionary. Bit confusing...
model.load_state_dict(checkpoint['state_dict'])

In [None]:
# Put the model in evaluation mode
model.eval()

### Create targets from the output of the encoder branch

[PyTorch forums discussion on layerwise viz](https://discuss.pytorch.org/t/how-to-visualize-fully-connected-layers-as-images/13626/2)
- In order to generate a variety of targets, we wish to save the output of the encoder's last layer
- Since the output activations will vary depending on the input data image, we will generate a latent vector for each training example in the training dataset

## Set up LArCV1 dataloader

In [None]:
# Path to the training data
test_data = "/media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_{}/".format(im_dim)
print("Training data will be loaded from: \n{}".format(test_data))

In [None]:
# Set up the torch dataloader
loader_kwargs = {'num_workers' : 2, 'batch_size': 1, 'shuffle': True}
test_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5],[0.5])])
test_dataset    = LArCV_loader(root = test_data, transforms = test_transforms)

In [None]:
dataloader = DataLoader(test_dataset, **loader_kwargs)

## Create directory for saving code layer output targets

In [None]:
# Specify the directory where the code vectors should be saved
deploy_dir = "/media/hdd1/kai/particle_generator/code_vectors/"
deploy_dir += "code_vectors_{}_{}/".format(im_dim, l_dim)

In [None]:
# Create the save directory, if it doesn't already exist
os.mkdir(deploy_dir)

In [None]:
deploy_dir += "code_vectors_{}_{}/".format(im_dim, l_dim)

In [None]:
os.mkdir(deploy_dir)

## Generate deploy targets by looping over the dataloader using only the encoder

In [None]:
# Codes will be a list of numpy arrays of 32-bit floats
codes = []

In [None]:
for idx, image in enumerate(dataloader):
    # Flatten image into a vector
    image = image.view(1, -1).to(device)
    
    # Get the output of just the encoder
    code = model.encoder(image)
    
    # Save the output tensor to a list
    codes.append(code.detach().cpu().numpy())

In [None]:
print("Generated {} code vectors from {} training images".format(len(codes), len(dataloader)))

In [None]:
codes[0].shape

## Save the list of code vectors to disk
- We then use the built-in np.save function to store the np array as a .npy file
- The numpy array of floats can be read back losslessly using np.load("float_file.npy")
- This process will require the downstream creation of a [custom dataset](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)

In [None]:
for idx, arr in enumerate(codes):
    file_name = deploy_dir + "target_{}.npy".format(idx)
    np.save(file_name, arr)

In [None]:
# Load a test vector
x = np.load(deploy_dir + "target_0.npy")
print(type(x))
print(x)