## Notebook to load weights and extract activations

Note: Please keep the file **architecture.py** in the same folder as this notebook, as well as the extracted **Data_Subset2** folder. Weights of the pre-trained model, i.e., **cnn_weights.pt** are available <a href="https://www.dropbox.com/s/q51rgk69cz90jn0/cnn_weights.pt?dl=0" target="_blank">here</a>. Please save them in the same folder as this notebook. Else, modify the paths appropriately!

#### Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import datetime

import torch
import torch.nn.functional as F
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

#### Load trained model

In [None]:
from architecture import model

#### Test for CUDA

In [None]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('No GPU, training on CPU')
else:
    print('GPU found, training on GPU')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Move model to GPU if available

In [None]:
model.to(device)

#### Load dataset

In [None]:
## custom dataset that includes image file paths, extends torchvision.datasets.ImageFolder
class ImageFolderWithPaths(datasets.ImageFolder):

    # override the __getitem__ method. this is the method dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [None]:
data_dir = 'Data_Subset2'
train_dir = data_dir + '/Train'
valid_dir = data_dir + '/Validation'
test_dir = data_dir + '/Test'

## number of subprocesses to use for data loading
num_workers = 0
## how many samples per batch to load
batch_size = 20

In [None]:
## convert data to a normalized torch.FloatTensor
transform = transforms.Compose([transforms.ToTensor()])

## choose the training and test datasets
train_data = ImageFolderWithPaths(train_dir, transform = transform)
valid_data = ImageFolderWithPaths(valid_dir, transform = transform)
test_data = ImageFolderWithPaths(test_dir, transform = transform)

## prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers,shuffle=False)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, num_workers=num_workers,shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=num_workers,shuffle=False)

## specify the image classes
classes = ['cortex', 'hypothalamus', 'striatum', 'vp', 'wm', 'zi']

#### Collect activations 

In [None]:
## note: this function makes the assumption that the batch size is a perfect divisor of
## the number of samples in the dataset being used

def collect_activations(loader,trained_model=model,bs=batch_size,reps_dim=64):
    num_samples = (len(loader)*bs)
    reps_mat = np.zeros((num_samples,reps_dim))
    labels_list = np.zeros(num_samples)
    locs_list = np.zeros((num_samples,2))
    
    cnt = 0
    
    trained_model.eval()
    
    for data, target, paths in loader:
        paths_array = np.asarray(paths)
    
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
    
        # forward pass: compute predicted outputs by passing inputs to the model
        output, latents = trained_model(data)
    
        for ii in range(len(paths_array)):
            s = paths_array[ii]
            underscores = ([pos for pos, char in enumerate(s) if char == '_'])
            dot = (([pos for pos, char in enumerate(s) if char == '.']))
            r = int(s[underscores[-2]+1:underscores[-1]])
            c = int(s[underscores[-1]+1:dot[0]])

            reps_mat[cnt] = latents[ii].cpu().detach().numpy()
            locs_list[cnt] = r,c
            labels_list[cnt] = target[ii].cpu().detach().numpy()
            
            cnt += 1
        
        
    return reps_mat, labels_list, locs_list

In [None]:
activations_train, labels_train, locs_train = collect_activations(train_loader)
activations_valid, labels_valid, locs_valid = collect_activations(valid_loader)
activations_test, labels_test, locs_test = collect_activations(test_loader)