### This notebook is meant for the visualization of the fully connected layers

In [1]:
import os
import torch
import numpy as np
import cv2 as cv
import pandas as pd
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import sys
sys.path.insert(1, '/home/kseuro/Kai/deeplearnphysics/pytorch/particle_generator/')

# My stuff
import ae
import utils
from dataloader import LArCV_loader

In [3]:
# Set the root path of the particle generator experiments folder
exp_root = "/media/hdd1/kai/particle_generator/experiments/"

In [4]:
# Get the names of all the experiments in the exp_root folder
exp_paths = []
for path in os.listdir(exp_root):
    exp_paths.append(os.path.join(exp_root, path))

print("-"*60)
for i in range(len(exp_paths)):
    print("\n Exp_{}:".format(str(i)), exp_paths[i], '\n')
    print("-"*60)

------------------------------------------------------------

 Exp_0: /media/hdd1/kai/particle_generator/experiments/01-07-2020_15-28-38_ae_7500_epochs_LArCV_64_dataset 

------------------------------------------------------------

 Exp_1: /media/hdd1/kai/particle_generator/experiments/01-09-2020_14-54-41_ewm_1_epochs_Code_Vectors_64_20 

------------------------------------------------------------

 Exp_2: /media/hdd1/kai/particle_generator/experiments/01-06-2020_11-29-26_ae_7500_epochs_LArCV_64_dataset 

------------------------------------------------------------

 Exp_3: /media/hdd1/kai/particle_generator/experiments/01-07-2020_11-06-54_ae_15000_epochs_LArCV_64_dataset 

------------------------------------------------------------

 Exp_4: /media/hdd1/kai/particle_generator/experiments/01-06-2020_11-28-04_ae_7500_epochs_LArCV_128_dataset 

------------------------------------------------------------


In [5]:
# Set the dir of the particular experiment for which to load a model
exp_dir = exp_paths[2] + "/"

In [6]:
# Create the full path to the experiment
exp_path = os.path.join(exp_root, exp_dir)
print("Experiment path set as: \n{}".format(exp_path))

Experiment path set as: 
/media/hdd1/kai/particle_generator/experiments/01-06-2020_11-29-26_ae_7500_epochs_LArCV_64_dataset/


In [7]:
# Path to model weights
weights_dir = "weights/"

In [8]:
# Load the config csv as a dict
config_csv = exp_path + "config.csv"
config_df = pd.read_csv(config_csv, delimiter = ",")

In [9]:
# Get the model architecture from config df
n_layers = int(config_df[config_df['Unnamed: 0'].str.contains("n_layers")==True]['0'].item())
l_dim    = int(config_df[config_df['Unnamed: 0'].str.contains("l_dim")==True]['0'].item())
im_size  = int(config_df[config_df['Unnamed: 0'].str.contains("dataset")==True]['0'].item())**2
im_dim   = int(np.sqrt(im_size))

In [10]:
# Set up AE layer sizes
base = [256] 

# Compute encoder sizes
sizes = lambda: [ (yield 2**i) for i in range(n_layers) ]
enc_sizes = base * n_layers
enc_sizes = [a*b for a,b in zip(enc_sizes, [*sizes()])][::-1]

# Update kwarg dicts
# Decoder is the reverse of the encoder
ae_kwargs = {'enc_sizes' : enc_sizes, 'l_dim' : l_dim, 'im_size' : im_size, 'dec_sizes' : enc_sizes[::-1]}

### Select a checkpoint file and load the model state dict

In [11]:
# Get checkpoint name(s)
checkpoint_path  = exp_path + weights_dir
checkpoint_names = []
for file in os.listdir(checkpoint_path):
    checkpoint_names.append(os.path.join(checkpoint_path, file))

In [12]:
print("-"*60)
for i in range(len(checkpoint_names)):
    print("\n Chkpt_{} :".format(str(i)), checkpoint_names[i], '\n')
    print("-"*60)

------------------------------------------------------------

 Chkpt_0 : /media/hdd1/kai/particle_generator/experiments/01-06-2020_11-29-26_ae_7500_epochs_LArCV_64_dataset/weights/best_ae_ep_7000.tar 

------------------------------------------------------------

 Chkpt_1 : /media/hdd1/kai/particle_generator/experiments/01-06-2020_11-29-26_ae_7500_epochs_LArCV_64_dataset/weights/best_ae_ep_7499.tar 

------------------------------------------------------------

 Chkpt_2 : /media/hdd1/kai/particle_generator/experiments/01-06-2020_11-29-26_ae_7500_epochs_LArCV_64_dataset/weights/best_ae_ep_5250.tar 

------------------------------------------------------------


In [13]:
# Select the desired checkpoint from the list
best_checkpoint = checkpoint_names[1]

In [14]:
# Load the model checkpoint
# Keys: ['state_dict', 'epoch', 'optimizer']
checkpoint = torch.load(best_checkpoint)

In [15]:
# Load the model on GPU (GPU 1 is the best option)
device = torch.device(2)
model = ae.AutoEncoder(**ae_kwargs).to(device)

In [16]:
# Load the model's state dictionary
# Note: The IncompatibleKeys(missing_keys=[], unexpected_keys=[]) message indicates that
#       there were no problems in loading the state dictionary. Bit confusing...
model.load_state_dict(checkpoint['state_dict'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [17]:
# Put the model in evaluation mode
model.eval()

AutoEncoder(
  (encoder): Encoder(
    (fc_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=4096, out_features=2048, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (1): Sequential(
        (0): Linear(in_features=2048, out_features=1024, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (2): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (3): Sequential(
        (0): Linear(in_features=512, out_features=256, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
    )
    (last): Linear(in_features=256, out_features=20, bias=True)
  )
  (decoder): Decoder(
    (fc_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=20, out_features=256, bias=True)
        (1): LeakyReLU(negative_slope=0.2)
      )
      (1): Sequential(
        (0): Linear(in_features=256, out_features=512, bias=True)
        (1): Leaky

### Create targets from the output of the encoder branch

[PyTorch forums discussion on layerwise viz](https://discuss.pytorch.org/t/how-to-visualize-fully-connected-layers-as-images/13626/2)
- In order to generate a variety of targets, we wish to save the output of the encoder's last layer
- Since the output activations will vary depending on the input data image, we will generate a latent vector for each training example in the training dataset

#### Set up LArCV1 dataloader

In [18]:
# Path to the training data
test_data = "/media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_{}/".format(im_dim)
print("Training data will be loaded from: \n{}".format(test_data))

Training data will be loaded from: 
/media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_64/


In [19]:
# Set up the torch dataloader
loader_kwargs = {'num_workers' : 2, 'batch_size': 1, 'shuffle': True}
test_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5],[0.5])])
test_dataset    = LArCV_loader(root = test_data, transforms = test_transforms)

Image conversion flag is: L
Images will be loaded from subfolder of: /media/hdd1/kai/particle_generator/larcv_data/train/larcv_png_64/


In [20]:
dataloader = DataLoader(test_dataset, **loader_kwargs)

#### Create directory for saving code layer output targets

In [23]:
# Specify the directory where the code vectors should be saved
deploy_dir = "/media/hdd1/kai/particle_generator/code_vectors/"
deploy_dir += "code_vectors_{}_{}/code_vectors_{}_{}/".format(im_dim, l_dim, im_dim, l_dim)

In [21]:
# Create the save directory, if it doesn't already exist
os.mkdir(deploy_dir)

FileExistsError: [Errno 17] File exists: '/media/hdd1/kai/particle_generator/code_vectors/code_vectors_64_20/code_vectors_64_20/'

#### Generate deploy targets by looping over the dataloader using only the encoder

In [21]:
# Codes will be a list of numpy arrays of 32-bit floats
codes = []
for idx, image in enumerate(dataloader):

    # Flatten image into a vector
    image = image.view(1, -1).to(device)
    
    # Get the output of just the encoder
    code = model.encoder(image)
    
    # Save the output tensor to a list
    codes.append(code.detach().cpu().numpy())
print("Generated {} code vectors from {} training images".format(len(codes), len(dataloader)))

Generated 53943 code vectors from 53943 training images


#### Save the list of code vectors to disk
- The torch tensors can be converted to numpy arrays
- We then use the built-in np.save function to store the np array as a .npy file
- The numpy array of floats can be read back losslessly using np.load("float_file.npy")
- This process will require the downstream creation of a [custom dataset](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)

In [25]:
for idx, arr in enumerate(codes):
    file_name = deploy_dir + "target_{}.npy".format(idx)
    np.save(file_name, arr)

In [27]:
# Load a test vector
x = np.load(deploy_dir + "target_0.npy")
print(type(x))
print(x)

<class 'numpy.ndarray'>
[[ 41.3999      -2.0707474   11.15646     75.178604   -71.26059
    7.078363    58.097836    -9.79913     20.420868   127.18924
  -22.877512   -10.997973   -30.48444     10.107766   -25.841742
  -93.63965      0.42487815 -30.668907    72.617386    62.44259   ]]
