### This notebook is meant for the generaton of code_vector targets from the encoder branch of a trained AutoEncoder

In [1]:
import os
import torch
import numpy as np
import cv2 as cv
import pandas as pd
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import sys
sys.path.insert(1, '/home/kseuro/Kai/deeplearnphysics/pytorch/particle_generator/')

# My stuff
import ae
import utils
from dataloader import LArCV_loader

In [3]:
# Set the root path of the particle generator experiments folder
exp_root = "/media/hdd1/kai/particle_generator/experiments/larcv_ae/"

## Get the names of all the experiments in the exp_root folder

In [5]:
exp_paths = []
for path in os.listdir(exp_root):
    exp_paths.append(os.path.join(exp_root, path))

print("-"*60)
for i in range(len(exp_paths)):
    print("\n Exp_{}:".format(str(i)), exp_paths[i], '\n')
    print("-"*60)

------------------------------------------------------------

 Exp_0: /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_20_7500-epochs 

------------------------------------------------------------

 Exp_1: /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_64_20_7500-epochs 

------------------------------------------------------------

 Exp_2: /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_256_1000-epochs 

------------------------------------------------------------

 Exp_3: /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_64_256_1000-epochs 

------------------------------------------------------------


In [6]:
# Set the dir of the particular experiment for which to load a model
exp_dir = exp_paths[2] + "/"

In [7]:
# Create the full path to the experiment
exp_path = os.path.join(exp_root, exp_dir)
print("Experiment path set as: \n{}".format(exp_path))

Experiment path set as: 
/media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_256_1000-epochs/


In [8]:
# Path to model weights
weights_dir = "weights/"

In [9]:
# Load the config csv as a dict
config_csv = exp_path + "config.csv"
config_df = pd.read_csv(config_csv, delimiter = ",")

In [10]:
# Get the model architecture from config df
n_layers = int(config_df[config_df['Unnamed: 0'].str.contains("n_layers")==True]['0'].item())
l_dim    = int(config_df[config_df['Unnamed: 0'].str.contains("l_dim")==True]['0'].item())
im_size  = int(config_df[config_df['Unnamed: 0'].str.contains("dataset")==True]['0'].item())**2
im_dim   = int(np.sqrt(im_size))

In [11]:
print("im_dim: {}, l_dim: {}".format(im_dim, l_dim))

im_dim: 128, l_dim: 256


In [12]:
# Set up AE layer sizes
base = [256] 

# Compute encoder sizes
sizes = lambda: [ (yield 2**i) for i in range(n_layers) ]
enc_sizes = base * n_layers
enc_sizes = [a*b for a,b in zip(enc_sizes, [*sizes()])][::-1]

# Update kwarg dicts
# Decoder is the reverse of the encoder
ae_kwargs = {'enc_sizes' : enc_sizes, 'l_dim' : l_dim, 'im_size' : im_size, 'dec_sizes' : enc_sizes[::-1]}

## Load model checkpoint

In [16]:
# Get checkpoint name(s)
checkpoint_path  = exp_path + weights_dir
checkpoint_names = []
for file in os.listdir(checkpoint_path):
    checkpoint_names.append(os.path.join(checkpoint_path, file))

In [17]:
print("-"*60)
for i in range(len(checkpoint_names)):
    print("\n{} :".format(str(i)), checkpoint_names[i], '\n')
    print("-"*60)

------------------------------------------------------------

0 : /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_256_1000-epochs/weights/best_ae_ep_600.tar 

------------------------------------------------------------

1 : /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_256_1000-epochs/weights/best_ae_ep_700.tar 

------------------------------------------------------------

2 : /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_256_1000-epochs/weights/best_ae_ep_900.tar 

------------------------------------------------------------

3 : /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_256_1000-epochs/weights/best_ae_ep_650.tar 

------------------------------------------------------------

4 : /media/hdd1/kai/particle_generator/experiments/larcv_ae/larcv_ae_128_256_1000-epochs/weights/best_ae_ep_950.tar 

------------------------------------------------------------

5 : /media/hdd1/kai/particle_generator

In [18]:
# Select the desired checkpoint from the list
best_checkpoint = checkpoint_names[1]

In [19]:
# Load the model checkpoint
# Keys: ['state_dict', 'epoch', 'optimizer']
checkpoint = torch.load(best_checkpoint)

In [20]:
# Load the model on GPU (GPU 1 is the best option)
device = torch.device(2)
model = ae.AutoEncoder(**ae_kwargs).to(device)

In [21]:
# Load the model's state dictionary
# Note: The IncompatibleKeys(missing_keys=[], unexpected_keys=[]) message indicates that
#       there were no problems in loading the state dictionary. Bit confusing...
model.load_state_dict(checkpoint['state_dict'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [22]:
# Put the model in evaluation mode
model.eval()

AutoEncoder(
  (encoder): Encoder(
    (fc_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=16384, out_features=2048, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (1): Sequential(
        (0): Linear(in_features=2048, out_features=1024, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (2): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (3): Sequential(
        (0): Linear(in_features=512, out_features=256, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
    )
    (last): Linear(in_features=256, out_features=256, bias=True)
  )
  (decoder): Decoder(
    (fc_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=256, out_features=256, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (1): Sequential(
        (0): Linear(in_features=256, out_features=512, bias=True)
        (1): Le

### Create targets from the output of the encoder branch

[PyTorch forums discussion on layerwise viz](https://discuss.pytorch.org/t/how-to-visualize-fully-connected-layers-as-images/13626/2)
- In order to generate a variety of targets, we wish to save the output of the encoder's last layer
- Since the output activations will vary depending on the input data image, we will generate a latent vector for each training example in the training dataset

## Set up LArCV1 dataloader

In [23]:
# Path to the training data
test_data = "/media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_{}/".format(im_dim)
print("Training data will be loaded from: \n{}".format(test_data))

Training data will be loaded from: 
/media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_128/


In [24]:
# Set up the torch dataloader
loader_kwargs = {'num_workers' : 2, 'batch_size': 1, 'shuffle': True}
test_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5],[0.5])])
test_dataset    = LArCV_loader(root = test_data, transforms = test_transforms)

Image conversion flag is: L
Images will be loaded from subfolder of: /media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_128/


In [25]:
dataloader = DataLoader(test_dataset, **loader_kwargs)

## Create directory for saving code layer output targets

In [26]:
# Specify the directory where the code vectors should be saved
deploy_dir = "/media/hdd1/kai/particle_generator/code_vectors/"
deploy_dir += "code_vectors_{}_{}/".format(im_dim, l_dim)

In [27]:
# Create the save directory, if it doesn't already exist
os.mkdir(deploy_dir)

In [28]:
deploy_dir += "code_vectors_{}_{}/".format(im_dim, l_dim)

In [29]:
os.mkdir(deploy_dir)

## Generate deploy targets by looping over the dataloader using only the encoder

In [30]:
# Codes will be a list of numpy arrays of 32-bit floats
codes = []

In [31]:
for idx, image in enumerate(dataloader):
    # Flatten image into a vector
    image = image.view(1, -1).to(device)
    
    # Get the output of just the encoder
    code = model.encoder(image)
    
    # Save the output tensor to a list
    codes.append(code.detach().cpu().numpy())

In [32]:
print("Generated {} code vectors from {} training images".format(len(codes), len(dataloader)))

Generated 50975 code vectors from 50975 training images


In [33]:
codes[0].shape

(1, 256)

## Save the list of code vectors to disk
- We then use the built-in np.save function to store the np array as a .npy file
- The numpy array of floats can be read back losslessly using np.load("float_file.npy")
- This process will require the downstream creation of a [custom dataset](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)

In [34]:
for idx, arr in enumerate(codes):
    file_name = deploy_dir + "target_{}.npy".format(idx)
    np.save(file_name, arr)

In [35]:
# Load a test vector
x = np.load(deploy_dir + "target_0.npy")
print(type(x))
print(x)

<class 'numpy.ndarray'>
[[  3.672798     5.910793    -5.899435     8.146846    -8.017069
   -0.11487905   2.6790698   -2.6881564    5.1323256   -1.6279917
   -6.1684184   -7.695417    -1.3086205   -1.9265165   -0.7399793
    3.9810693   -7.8196726    4.668637    -6.622833     7.8188305
    4.584268    -5.0726523    4.5567794    5.069749    -4.948919
    5.145309    -2.3652625    5.226916     1.8919929   10.40465
   -0.30209237   0.9712619    8.325179    -2.4420173   -3.239091
    1.5912156    7.9583426   -5.7852426  -10.070942    -1.546155
   -9.659725     4.2528563   -4.572234    -2.053901    -2.406771
    7.606579     7.891351    -0.4976655   -5.938695    -6.5565534
   -5.948125     3.3551672    1.0804181   -0.6388806   -7.303273
    8.18918      9.536243    -9.971        4.2790594    2.7648807
    4.6839604    0.08254389   4.201904    -9.390796    -1.7352256
   -0.21342005   1.9241855   -1.1673998   10.179223    -3.8673122
   -5.958336     4.2275815   -4.873247    11.547461     0.62