# Description
Train and retrain the weights of optical neural network encoders, using the input images or activations of hidden layers as inputs.

# Load Libraries

In [1]:
from __future__ import print_function
import os, sys
import math
import argparse
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.swa_utils import AveragedModel, SWALR
from torch.optim.lr_scheduler import CosineAnnealingLR
import optuna
import ray

import wandb
from torchvision import datasets, transforms

In [2]:
""" Training and hyperparameter search configurations """

parser = argparse.ArgumentParser(description='PyTorch QuickDraw Example')
parser.add_argument('--batch-size', type=int, default=128, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--wandb', action='store_true', default=True, 
                    help='enables wandb logger')  
parser.add_argument('--epochs', type=int, default=5000, metavar='N',
                    help='number of epochs to train (default: 100)')
args = parser.parse_args("")
args.cuda = not args.no_cuda and torch.cuda.is_available()

# Set random seeds to reproduce results
torch.manual_seed(42)
if args.cuda:
    torch.cuda.manual_seed(42)

# Dataloaders

In [3]:
""" Construct a dataset object from data. """

class EBI_Cell_Dataset(torch.utils.data.Dataset):
    def __init__(self, data_import, label_import, targets, data_transforms=None):
        self.physical_ground_truth = torch.tensor(data_import, dtype = torch.float32)
        self.labels = torch.tensor(label_import, dtype = torch.long)
        self.digital_ground_truth = torch.tensor(targets, dtype = torch.float32)
        self.class_dict = {}
        self.transform = data_transforms
                
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, idx):
        data = self.physical_ground_truth[idx]
        
        if self.transform:
             data = self.transform(self.physical_ground_truth[idx].unsqueeze(0))
            
        return data, self.labels[idx]#, self.digital_ground_truth[idx,:]

In [4]:
""" Dataloaders for input images """

train_GT = np.load('./Cell_GT_images.npz')['train_data']
train_labels = np.load('./Cell_GT_images.npz')['train_labels']
test_GT = np.load('./Cell_GT_images.npz')['test_data']
test_labels = np.load('./Cell_GT_images.npz')['test_labels']
train_grey = np.load('./EBI_Cells.npz')['train_data_grey']
test_grey = np.load('./EBI_Cells.npz')['test_data_grey']

train_idx = []
val_idx = []
np.random.seed(32)
for l in np.unique(train_labels):
    class_idx = (train_labels == l).nonzero()[0]
    np.random.shuffle(class_idx)
    train_idx.append(class_idx[:160])
    val_idx.append(class_idx[160:])
train_idx = np.array(train_idx).flatten()
val_idx = np.array(val_idx).flatten()

transforms_input = transforms.Compose([transforms.Lambda(lambda x: x.view(-1)), \
                                       transforms.Lambda(lambda x: x/200)])

transforms_input_distort = transforms.Compose([transforms.RandomAffine(0, translate=(0.05, 0.05), scale=(0.96, 1.04)), \
                                               transforms.Lambda(lambda x: x.view(-1)), \
                                               transforms.Lambda(lambda x: x/200)])

# transforms_digi = transforms.Compose([transforms.ToTensor(), \
#                                       transforms.Lambda(lambda x: x.view(-1))])

# transforms_digi_distort = transforms.Compose([transforms.ToTensor(), \
#                                               transforms.RandomAffine(0, translate=(0.05, 0.05), scale=(0.96, 1.04)), \
#                                               transforms.Lambda(lambda x: x.view(-1))])
kwargs = {}

""" input image loaders """
train_loader_phys = torch.utils.data.DataLoader( \
    EBI_Cell_Dataset(train_GT[train_idx,:], train_labels[train_idx], train_grey[train_idx,:], transforms_input), \
    batch_size=1000, shuffle=False, **kwargs)

val_loader_phys = torch.utils.data.DataLoader( \
    EBI_Cell_Dataset(train_GT[val_idx,:], train_labels[val_idx], train_grey[val_idx,:], transforms_input), \
    batch_size=args.test_batch_size, shuffle=False, **kwargs)

test_loader_phys = torch.utils.data.DataLoader( \
    EBI_Cell_Dataset(test_GT, test_labels, test_grey, transforms_input), \
    batch_size=args.test_batch_size, shuffle=False, **kwargs)

""" input image loader w/. customerized samplers """
train_sampler = torch.utils.data.BatchSampler(\
                                              torch.utils.data.RandomSampler(range(len(train_idx))), \
                                              batch_size=args.batch_size, drop_last=False)
val_sampler = torch.utils.data.BatchSampler(\
                                              torch.utils.data.RandomSampler(range(len(val_idx))), \
                                              batch_size=args.test_batch_size, drop_last=False)
test_sampler = torch.utils.data.BatchSampler(\
                                             torch.utils.data.RandomSampler(range(test_GT.shape[0])), \
                                             batch_size=args.test_batch_size, drop_last=False)

# train_loader_phys = torch.utils.data.DataLoader( \
#     EBI_Cell_Dataset(train_GT, train_labels, train_grey, transforms_input_distort), \
#     batch_sampler = train_sampler, **kwargs)

# test_loader_phys = torch.utils.data.DataLoader( \
#     EBI_Cell_Dataset(test_GT, test_labels, test_grey, transforms_input), \
#     batch_sampler = test_sampler, **kwargs)

In [5]:
""" Dataloaders for feeding act1 """

train_data_exp = np.load('./Train_Data_Cell_linear.npz')
train_fc1 = torch.tensor(train_data_exp['train_data_fc1'][train_idx,:], dtype=torch.float32)
train_labels = torch.tensor(train_data_exp['train_labels'][train_idx])

val_fc1 = torch.tensor(train_data_exp['train_data_fc1'][val_idx,:], dtype=torch.float32)
val_labels = torch.tensor(train_data_exp['train_labels'][val_idx])

test_data_exp = np.load('./Test_Data_Cell_linear.npz')
test_fc1 = torch.tensor(test_data_exp['test_data_fc1'], dtype=torch.float32)
test_labels = torch.tensor(test_data_exp['test_labels'])

val_grey = train_grey[val_idx,:]
train_grey = train_grey[train_idx,:]

transforms_fc1 = transforms.Compose([transforms.Lambda(lambda x: x.view(-1)), \
                                      transforms.Lambda(lambda x: x)])

transforms_act1 = transforms.Compose([transforms.Lambda(lambda x: x.view(-1)), \
                                      transforms.Lambda(lambda x: x/6000)])

transforms_fc2 = transforms.Compose([transforms.Lambda(lambda x: x.view(-1)), \
                                      transforms.Lambda(lambda x: x/10000)])

""" fc1 loader w/. built-in samplers """
train_loader_fc1 = torch.utils.data.DataLoader( \
    EBI_Cell_Dataset(train_fc1, train_labels, train_grey, transforms_fc1), \
    batch_size=args.batch_size, shuffle=False, **kwargs)

val_loader_fc1 = torch.utils.data.DataLoader( \
    EBI_Cell_Dataset(val_fc1, val_labels, val_grey, transforms_fc1), \
    batch_size=args.test_batch_size, shuffle=False, **kwargs)

test_loader_fc1 = torch.utils.data.DataLoader( \
    EBI_Cell_Dataset(test_fc1, test_labels, test_grey, transforms_fc1), \
    batch_size=args.test_batch_size, shuffle=False, **kwargs)


  """
  


In [6]:
print(next(enumerate(train_loader_fc1))[1][0].shape)


torch.Size([100, 4])


# NN Definitions

In [7]:
class ONNLinear(nn.Module):
    def __init__(self, in_features, out_features, weight_noise=0.00, zeros_init=False):
        super(ONNLinear, self).__init__()
        self.W_Opt = \
            nn.Parameter(torch.randn(out_features, in_features, requires_grad = True) / math.sqrt(in_features)) \
            if not zeros_init else \
            nn.Parameter(torch.zeros(out_features, in_features, requires_grad = True)) 
        self.noise = weight_noise
        
    def forward(self, x):
        self.W_Opt.data = self.W_Opt.data.clamp_(0,1)
        out = F.linear(x + self.noise*torch.rand(x.shape, requires_grad=False, device=x.device), self.W_Opt)
        return out

def exp(Y, params):
    return torch.exp(-1.*torch.mul(Y, params))

def NL_func(Y, nonlinear_paramters):
    return torch.add(-1.*torch.mul(exp(Y, nonlinear_paramters[:, 1]), nonlinear_paramters[:, 0]) + \
                      -1.*torch.mul(exp(Y, nonlinear_paramters[:, 3]), nonlinear_paramters[:, 2]), \
                      nonlinear_paramters[:, 0]+nonlinear_paramters[:, 2])/50

class ONNIntensifier(nn.Module):
    def __init__(self, func, coeffs):
        super(ONNIntensifier, self).__init__()
        self.func = func
        self.params = coeffs
        
    def forward(self, x):
        out = self.func(x, self.params)
        return out

In [8]:
""" Definition of autoencoder structure """

coeffs = np.load('./Nonlinearity_Curves_April_8_data_refit.npz')['coeffs']

class OpticalClassifier(nn.Module):
    def __init__(self, hidden_size, compressed_size, **kwargs):
        super().__init__()
        self.coeffs = nn.Parameter(torch.tensor(coeffs, dtype=torch.float32), requires_grad = False)
        self.fc1 = ONNLinear(1600, hidden_size, zeros_init=False)
        self.nonlinear = ONNIntensifier(NL_func, self.coeffs)
        self.fc2 = ONNLinear(hidden_size, compressed_size)
        self.digifc = nn.Linear(compressed_size, 5, bias=True)
            
    def forward(self, x):
        x = self.fc1(x)
        x = self.nonlinear(x)
        x = self.fc2(x)
        y = self.digifc(x)
        return y
    
class OpticalClassifier2(nn.Module):
    def __init__(self, hidden_size, compressed_size, **kwargs):
        super().__init__()
        self.coeffs = nn.Parameter(torch.tensor(coeffs, dtype=torch.float32), requires_grad = False)
        self.fc1 = ONNLinear(1600, hidden_size, zeros_init=False)
        self.nonlinear = ONNIntensifier(NL_func, self.coeffs)
        self.fc2 = ONNLinear(hidden_size, compressed_size)
        self.digifc = nn.Linear(compressed_size, 5, bias=True)
            
    def forward(self, x):
       # x = self.fc1(x)
       # x = self.nonlinear(x)
        x = self.fc2(x)
        y = self.digifc(x)
        return y
    
class OpticalClassifier3(nn.Module):
    def __init__(self, hidden_size, compressed_size, **kwargs):
        super().__init__()
        self.coeffs = nn.Parameter(torch.tensor(coeffs, dtype=torch.float32), requires_grad = False)
        self.fc1 = ONNLinear(1600, hidden_size, zeros_init=False)
        self.nonlinear = ONNIntensifier(NL_func, self.coeffs)
        self.fc2 = ONNLinear(hidden_size, compressed_size)
        self.digifc = nn.Linear(compressed_size, 5, bias=True)
            
    def forward(self, x):
       # x = self.fc1(x)
       # x = self.nonlinear(x)
#         x = self.fc2(x)
        y = self.digifc(x)
        return y

class digitalClassifier(nn.Module):
    def __init__(self, Nunits, **kwargs):
        super().__init__()
        self.fcs = nn.ModuleList([nn.Linear(i,j,**kwargs) for i, j in zip(Nunits[:-1], Nunits[1:])])

    def forward(self, X):
        X = X.view(X.size(0), -1)
        for i, fc in enumerate(self.fcs):
            X = fc(X)
            if fc is not self.fcs[-1]:
                X = F.relu(X)
        return X

In [9]:
 """ helper functions and classes """
    
# A manager for dynamical book-keeping of the top k accuracies and model checkpoints during training
class top_k_manager(object):
    def __init__(self, k=10):
        self.k_best = k
        self.top_k_metric =[0.0]*self.k_best
        self.top_k_paths = [""]*self.k_best
    
    # Compare the new_metric to the top k metrics in the past, and find its place.
    def update_rank(self, new_metric, path_keeping):
        for rank, record_metric in enumerate(self.top_k_metric):
            if record_metric <= new_metric:    
                if os.path.exists(self.top_k_paths[-1]):
                    os.remove(self.top_k_paths[-1])
                if rank < self.k_best - 1:
                    self.top_k_metric[rank+1:] = self.top_k_metric[rank:-1]
                    self.top_k_paths[rank+1:] = self.top_k_paths[rank:-1]  
                self.top_k_metric[rank] = new_metric
                self.top_k_paths[rank] = path_keeping
                return True # the top k list has been updated
        return False
    
# A simple hook class that returns the input and output of a layer during forward/backward pass
class Hook():
    def __init__(self, module, backward=False):
        if backward==False:
            self.hook = module.register_forward_hook(self.hook_fn)
        else:
            self.hook = module.register_backward_hook(self.hook_fn)
    def hook_fn(self, module, input, output):
        self.input = input
        self.output = output
    def close(self):
        self.hook.remove()

In [8]:
model = OpticalClassifier(36,4)
model_ckpt = torch.load("./fc2_in_fc2_fine_AdamW_lr_0.017330_bs_128_lrf_0.0173_b1_0.9979_b2_0.9919_swalr_0.002_swastart_1000_v_5/ep2120.pt", map_location=torch.device('cpu')) 
model_state_dict = model_ckpt["model_state_dict"]
model.load_state_dict(model_state_dict)

<All keys matched successfully>

In [15]:
np.savez("fc2_retrain_Apr_13_v5_ep2120.npz", \
         fc1_W_Opt = model.fc1.W_Opt.detach().numpy(),\
         fc2_W_Opt = model.fc2.W_Opt.detach().numpy())

In [13]:
model = OpticalClassifier(36,4)
model(list(train_loader_input)[0][0][0:2,:])

tensor([[ 0.4691, -3.0467, -1.6177, -3.0145, -0.4717,  1.6622, -0.9318,  1.4723,
         -1.4515,  1.6655],
        [ 0.5101, -3.1806, -1.6883, -3.1227, -0.5376,  1.7101, -0.9358,  1.5433,
         -1.5169,  1.7194]], grad_fn=<AddmmBackward0>)

In [14]:
model.state_dict()

OrderedDict([('coeffs',
              tensor([[ 4.2605e+02,  3.5174e-02, -5.7327e+00,  5.2580e+00],
                      [ 6.1506e+01,  9.6526e-02,  5.8841e+02,  1.0870e-02],
                      [ 7.0700e+01,  1.1211e-01,  2.5055e+03,  1.9284e-03],
                      [ 5.2562e+02,  9.3226e-03,  8.1585e+01,  8.6148e-02],
                      [ 6.7444e+02,  1.0104e-02,  4.3830e+01,  1.5157e-01],
                      [ 7.3232e+02,  1.0176e-02,  6.8957e+01,  1.3964e-01],
                      [ 4.2756e+02,  1.1948e-02,  9.1883e+01,  7.2031e-02],
                      [ 2.4695e+02,  2.2003e-02,  1.8514e+02,  2.2003e-02],
                      [ 1.8211e+05,  6.9360e-06,  1.6148e+02,  3.2329e-02],
                      [ 5.5647e+02,  4.3578e-03,  9.8354e+01,  4.2465e-02],
                      [ 1.8594e+02,  3.3396e-02,  1.2071e+04,  1.0443e-04],
                      [ 1.4162e+04,  7.0703e-05,  4.4031e+02,  1.4798e-02],
                      [ 3.6910e+02,  1.7240e-02,  5.5869e+01,  9

# Define training and testing loops for each epoch

    Explanation on quantization-aware training algorithm used in train():
    1. The activations are calculated with a forward passing, involving only quantized weights and activations. Meanwhile, the non-quantized version of the weights is still kept in memory for later use..
    2. The gradients are calculated with backprop based on the quantized activations and weights calucated in (1). 
    3. The non-quantized version of parameters (weights + biases) are updated with gradients, and saved without quantization. Quantizating parameters immediately after updating them can erase small updates.
    4. Quantization is only later performed on these parameters during the evaluation of activations in forward passing or errors in backprop. During these evaluation steps, a quantized copy of the non-quantized parameters are used. Meanwhile, the original non-quantized version stays unchanged until updated with the next batch of calculated gradients.
    PS: the clipping of the parameters represents the straight-through esimator across hard tanh nonlinear layers.


In [19]:
def train(epoch, model, optimizer, criterion, train_loader0):
    model.train()
    # Loop around mini-batches in an epoch
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader0):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        
        optimizer.step()
        
        pred = outputs.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

        """
        if batch_idx % args.log_interval == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}"
            +f" ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")
        """
        if args.wandb:
            wandb.log({"train_loss": loss.item(), "batch": batch_idx}, step = epoch)
            
#     accuracy = 100.0 * correct/len(train_loader0.dataset)
#     print(f"Train Epoch: {epoch} \t\tLoss: {loss.item():.6f}\tAccuracy: {accuracy: .2f}%")

def test(epoch, model, criterion, test_loader0):
    model.eval()
    test_loss = 0
    correct = 0
    hks = []
    # Loop around mini-batches in an epoch
    with torch.no_grad():
        for data, target in test_loader0:
            if args.cuda:
                data, target = data.cuda(), target.cuda()
            outputs = model(data)
            test_loss += criterion(outputs, target).item() # sum up batch loss
            pred = outputs.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader0)
    accuracy = 100. * correct / len(test_loader0.dataset)
    val_stats = {"val_loss": test_loss, "accuracy": accuracy}
    for i, hk in enumerate(hks):
        val_stats[f"fc{i+1}"] = wandb.Histogram(hks[i].output.cpu())      
    
    if epoch%100 == 0:
        print(f"\nTest set: Epoch {epoch}, Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader0.dataset)}" 
              +f"({accuracy:.0f}%)\n")

    if args.wandb:
        wandb.log(val_stats, step=epoch)
    return test_loss, accuracy

In [20]:
""" The objective function runs a trial in a NAS study (a loop around epochs) """

def objective(trial, NAS_project_name, gpu_id, train_loader0, test_loader0):

    # Define the hyperparameter search space
    batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])
    lr_factor0 = 5E-3 * np.sqrt(128/100)
    lr_factor = trial.suggest_uniform("lrf", lr_factor0/2, lr_factor0*4)
    learning_rate = lr_factor * np.sqrt(batch_size/128)
    swa_lr = trial.suggest_uniform("swa_lr", 3E-5, 1E-3)
    beta1 = trial.suggest_uniform("beta1", 0.99, 0.9999)
    beta2 = trial.suggest_uniform("beta2", 0.99, 0.9999)
    swa_start = trial.suggest_categorical("swa_start", [3000, 4000])
    #warmup_epochs = trial.suggest_categorical("warmup_eps", [18, 24, 30])
    #model_description = f"Autoencoder_QAT_ar5wd5_lr_{learning_rate:.3f}_{lr_decay:.2f}" + f"_m_{momentum:.2f}" + f"_wp_{warmup_epochs}"+ f"_v_{trial.number}"
    model_description = f"S32_AdamW_lr_{learning_rate:.6f}_bs_{batch_size}_lrf_{lr_factor:.4f}_b1_{beta1:.4f}_b2_{beta2:.4f}_swalr_{swa_lr:.6f}_{swa_start}" + f"_v_{trial.number}"
    
    # Instantiate a BNN model
    model = OpticalClassifier3(36, 4)
#     model_ckpt = torch.load("./AdamW_lr_0.013669_bs_256_lrf_0.0097_b1_0.9977_b2_0.9255_swalr_0.003_1500_v_89/ep2781.pt", map_location=torch.device('cpu')) 
#     model_state_dict = model_ckpt["model_state_dict"]
#     model.load_state_dict(model_state_dict)
#     with torch.no_grad():
#         model.fc1.W_Opt.copy_(model_ckpt['model_state_dict']['fc1.W_Opt'])
#         model.fc2.W_Opt.copy_(model_ckpt['model_state_dict']['fc2.W_Opt'])
#         model.coeffs.copy_(model_ckpt['model_state_dict']['coeffs'])
#         model.nonlinear.params.copy_(model_ckpt['model_state_dict']['nonlinear.params'])

    if args.cuda:
        torch.cuda.set_device(gpu_id)
        model.cuda() # transfer the model from cpu to gpu
    
    # Set up logging if necessary
    if args.wandb:
        wandb.init(project=NAS_project_name, name=model_description, reinit=True)
        wandb.watch(model, log="all")

    # Configure loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    #optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    #optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, betas=(beta1, beta2), weight_decay=0e-4)
    
    swa_model = AveragedModel(model)
    scheduler = CosineAnnealingLR(optimizer, T_max=swa_start+600)
    swa_scheduler = SWALR(optimizer, swa_lr=swa_lr)
    
    train_loader0.batch_sampler.batch_size = batch_size

    # Loop around epoches
    tpk_mngr = top_k_manager(30)
    ckpt_save_path = "./" + model_description
    if not os.path.exists(ckpt_save_path):
        os.makedirs(ckpt_save_path)
    for epoch in range(1, args.epochs + 1):
        train(epoch, model, optimizer, criterion, train_loader0)
        loss, accu = test(epoch, model, criterion, test_loader0)
        # schedule learning rate decay
        if epoch > swa_start:
            swa_model.update_parameters(model)
            swa_scheduler.step()
        else:
            scheduler.step()
        # Save the best models aftering the training gets more stable
        if epoch > 30:
            if tpk_mngr.update_rank(accu, ckpt_save_path+f"/ep{epoch}.pt"):
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss,
                }, ckpt_save_path+f"/ep{epoch}.pt")

    # Log the best models
    trial.set_user_attr('top 3 accuracy', torch.tensor(tpk_mngr.top_k_metric[:3]).mean().item()) # Save the best accuracy during the taining loop      
    if args.wandb:
        wandb.run.summary["top_k_accu"] = tpk_mngr.top_k_metric
        wandb.run.summary["top_k_paths"] = tpk_mngr.top_k_paths
    return torch.tensor(tpk_mngr.top_k_metric[:30]).mean().item() # return the average of top k accuracies to guide NAS

In [21]:
""" Remote projects for NAS by running an optuna study on a thread """

@ray.remote(num_cpus=0.2, num_gpus=0.2)
class Parallel_NAS_project(object):
    def __init__(self, NAS_project_name, rseed, gpu_id):
        os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
        torch.cuda.set_device(gpu_id)
        self.sampler = optuna.samplers.TPESampler(seed=rseed) 
        self.storage = f'sqlite:///'+NAS_project_name+'.db' # way to specify an SQL database
        self.study = optuna.create_study(study_name=NAS_project_name, storage=self.storage, 
                                sampler=self.sampler, direction="maximize", load_if_exists=True)
    
    def runStudy(self, gpu_id, train_loader, test_loader):
        self.study.optimize(lambda trial: objective(trial, NAS_project_name, gpu_id, train_loader, test_loader), n_trials=18)

In [23]:
""" Create remote projects for NAS """

ray.init(num_gpus=4, ignore_reinit_error=True)
gpu_list = [0,0,0,0]
rseeds = [1514,6,21,93258]

NAS_project_name = "Cell_July_7_linear_digifc_retrain"
workerList = []
for (rseed, gpu_id) in zip(rseeds, gpu_list): 
    worker = Parallel_NAS_project.remote(NAS_project_name, rseed, gpu_id)
    workerList.append(worker)
    print(rseed, gpu_id) 

#train_loader_id = ray.put(train_loader) # important for large data loaders, since they would surpass memory limit if passed as parameters to study functions.
#test_loader_id = ray.put(test_loader)
remaining_ids = []
for i, w in enumerate(workerList):
    test_id = w.runStudy.remote(gpu_list[i], train_loader_fc1, val_loader_fc1)
    remaining_ids.append(test_id)

while remaining_ids:
    done_ids, remaining_ids = ray.wait(remaining_ids)
    result_id = done_ids[0]
    print(done_ids, remaining_ids)
    ray.get(result_id)
    
ray.shutdown()

2022-07-06 20:03:43,309	INFO worker.py:879 -- Calling ray.init() again after it has already been called.


1514 0
6 0
21 0
93258 0


(Parallel_NAS_project pid=78170) [I 2022-07-06 20:03:45,184] Using an existing study with name 'Cell_July_7_linear_digifc_retrain' instead of creating a new one.
(Parallel_NAS_project pid=78196) [I 2022-07-06 20:03:45,203] Using an existing study with name 'Cell_July_7_linear_digifc_retrain' instead of creating a new one.
(Parallel_NAS_project pid=78194) [I 2022-07-06 20:03:45,284] Using an existing study with name 'Cell_July_7_linear_digifc_retrain' instead of creating a new one.
(Parallel_NAS_project pid=78186) [I 2022-07-06 20:03:45,347] Using an existing study with name 'Cell_July_7_linear_digifc_retrain' instead of creating a new one.
(Parallel_NAS_project pid=78194) wandb: Currently logged in as: gangsterkitty (use `wandb login --relogin` to force relogin)
(Parallel_NAS_project pid=78170) wandb: Currently logged in as: gangsterkitty (use `wandb login --relogin` to force relogin)
(Parallel_NAS_project pid=78196) wandb: Currently logged in as: gangsterkitty (use `wandb login --relo

(Parallel_NAS_project pid=78170) 
(Parallel_NAS_project pid=78170) Test set: Epoch 100, Average loss: 1395.7052, Accuracy: 122/200(61%)
(Parallel_NAS_project pid=78170) 
(Parallel_NAS_project pid=78196) 
(Parallel_NAS_project pid=78196) Test set: Epoch 100, Average loss: 1509.5153, Accuracy: 130/200(65%)
(Parallel_NAS_project pid=78196) 
(Parallel_NAS_project pid=78194) 
(Parallel_NAS_project pid=78194) Test set: Epoch 100, Average loss: 504.7755, Accuracy: 139/200(70%)
(Parallel_NAS_project pid=78194) 
(Parallel_NAS_project pid=78186) 
(Parallel_NAS_project pid=78186) Test set: Epoch 100, Average loss: 776.3391, Accuracy: 139/200(70%)
(Parallel_NAS_project pid=78186) 
(Parallel_NAS_project pid=78170) 
(Parallel_NAS_project pid=78170) Test set: Epoch 200, Average loss: 454.2823, Accuracy: 135/200(68%)
(Parallel_NAS_project pid=78170) 
(Parallel_NAS_project pid=78196) 
(Parallel_NAS_project pid=78196) Test set: Epoch 200, Average loss: 638.6729, Accuracy: 144/200(72%)
(Parallel_NAS_proj

(Parallel_NAS_project pid=78170) Test set: Epoch 2600, Average loss: 19.0741, Accuracy: 158/200(79%)
(Parallel_NAS_project pid=78170) 
(Parallel_NAS_project pid=78194) 
(Parallel_NAS_project pid=78194) Test set: Epoch 2100, Average loss: 96.7722, Accuracy: 160/200(80%)
(Parallel_NAS_project pid=78194) 
(Parallel_NAS_project pid=78186) 
(Parallel_NAS_project pid=78186) Test set: Epoch 2600, Average loss: 118.1649, Accuracy: 136/200(68%)
(Parallel_NAS_project pid=78186) 
(Parallel_NAS_project pid=78196) 
(Parallel_NAS_project pid=78196) Test set: Epoch 2700, Average loss: 17.5415, Accuracy: 142/200(71%)
(Parallel_NAS_project pid=78196) 
(Parallel_NAS_project pid=78170) 
(Parallel_NAS_project pid=78170) Test set: Epoch 2700, Average loss: 29.5167, Accuracy: 143/200(72%)
(Parallel_NAS_project pid=78170) 
(Parallel_NAS_project pid=78194) 
(Parallel_NAS_project pid=78194) Test set: Epoch 2200, Average loss: 93.8906, Accuracy: 147/200(74%)
(Parallel_NAS_project pid=78194) 
(Parallel_NAS_proje

KeyboardInterrupt: 

In [24]:
ray.shutdown()

In [15]:
kill_gpu_processes()

NameError: name 'kill_gpu_processes' is not defined

# Check inputs

In [11]:
%matplotlib notebook

In [12]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1,2, figsize=(8,4))
sample = next(enumerate(train_loader_phys))[1]
i = 1
ax[0].imshow(sample[0][i,:].reshape(40,-1))
#ax[1].imshow(sample[2][i,:].reshape(100,-1))
print(sample[1][i])
print(f"sum of pixels {sample[0][i,:].sum()}")
print(sample)

<IPython.core.display.Javascript object>

tensor(0)
sum of pixels 232.93048095703125
[tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [53]:
fig, ax = plt.subplots(1,1)
pxsum = next(enumerate(train_loader_phys))[1][0].sum(axis=1).detach().numpy()
plt.hist(pxsum, 50)
ax.set_title(f"mean pixel value = {pxsum.sum()/40**2}")

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'mean pixel value = 20365.84')

# Check fc1

In [21]:
model0 = OpticalClassifier(36,4)
# model_ckpt = torch.load("./v120_ep626.pt", map_location=torch.device('cpu')) 
model_ckpt = torch.load("./descent89_AdamW_lr_0.000371_bs_128_lrf_0.0004_b1_0.9904_b2_0.9994_swalr_0.000689_1000_v_14/ep4390.pt", map_location=torch.device('cpu')) 
model_state_dict = model_ckpt["model_state_dict"]
model0.load_state_dict(model_state_dict)

hk_fc1 = Hook(model0.fc1)
hk_act1 = Hook(model0.nonlinear)
hk_fc2 = Hook(model0.fc2)
model0(next(enumerate(train_loader_phys))[1][0])
hk_fc1.output.cpu().shape

torch.Size([1000, 36])

In [22]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

fig, ax = plt.subplots(1, 1, figsize=(8, 5))

fc1_calc_batch = hk_fc1.output.cpu()[range(100),:].detach().numpy()
#fc1_meas_batch = train_fc1[:100,:].detach().numpy()
fc1_meas_batch = next(enumerate(train_loader_fc1))[1][0][:100,:].detach().numpy()
reg = LinearRegression().fit(fc1_calc_batch.reshape(-1,1), fc1_meas_batch.reshape(-1,1))
for idx in range(100):
    fc1_calc = fc1_calc_batch[idx,:]
    fc1_meas = fc1_meas_batch[idx,:]
    ax.scatter(fc1_calc, fc1_meas, alpha=0.5, linestyle="solid", s=12, label=f"sample {idx}")

x0 = np.arange(0,300,1).reshape(-1,1)
T2_inv = lambda x: x * reg.coef_ + reg.intercept_
ax.plot(x0, T2_inv(x0))
# axes labels and title
ax.set_xlabel("fc1 calculated from physical ground truth", fontname="Arial")
ax.set_ylabel("fc1 measured from camera", fontname="Arial")
ax.set_xlim(0,)
ax.set_ylim(0,)
#plt.title("title")

#auto legend setting and figure config
ax.legend()
plt.tight_layout()

plt.show()
print(reg.coef_)
print(reg.intercept_)

<IPython.core.display.Javascript object>

findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.


[[3279.6426]]
[-10876.125]


# Check act1

In [38]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

fig, ax = plt.subplots(1, 1, figsize=(8, 5))

act1_calc_batch = hk_act1.output.cpu()[range(100),:].detach().numpy()
act1_meas_batch = next(enumerate(train_loader_act1))[1][0][:100,:].detach().numpy()
reg = LinearRegression().fit(act1_calc_batch.reshape(-1,1), act1_meas_batch.reshape(-1,1))
for idx in range(100):
    act1_calc = act1_calc_batch[idx,:]
    act1_meas = act1_meas_batch[idx,:]
    ax.scatter(act1_calc, act1_meas, alpha=0.5, linestyle="solid", s=12, label=f"sample {idx}")

x0 = np.arange(0,10,1).reshape(-1,1)
T2_inv = lambda x: x * reg.coef_ + reg.intercept_
ax.plot(x0, T2_inv(x0))
# axes labels and title
ax.set_xlabel("fc2 calculated from physical ground truth", fontname="Arial")
ax.set_ylabel("fc2 measured from camera", fontname="Arial")
ax.set_xlim(0,)
ax.set_ylim(0,)
#plt.title("title")

#auto legend setting and figure config
ax.legend()
plt.tight_layout()

plt.show()
print(reg.coef_)
print(reg.intercept_)

<IPython.core.display.Javascript object>



[[0.85290444]]
[0.7097058]


# Check Nonlinearity

In [22]:
from scipy.optimize import curve_fit

def func2(x, b, c, d, e):
    return (-1.*b*np.exp(-c*x) - d*np.exp(-e*x) + b + d)

coeffs_refit = np.load('./Nonlinearity_Curves_April_8_data_refit.npz')['coeffs']

# model = OpticalClassifier(36,4)
# model_ckpt = torch.load("./v120_ep626.pt", map_location=torch.device('cpu')) 
# model_state_dict = model_ckpt["model_state_dict"]
# model.load_state_dict(model_state_dict)
# model.coeffs.copy_(torch.tensor(coeffs_refit, dtype=torch.float32))

fig, axes = plt.subplots(6, 6, figsize = (18, 18))
# x0 = torch.arange(0, 400, 0.1)
# y0s = model.nonlinear(x0.tile(36,1).T)
p0s = np.ones((36,4))
p0s[0,:] = [20, 4, 0, 10]
p0s[3,:] = [10, 1/3, 1, 1]
p0s[5,:] = [20, 1/3, 1, 1]
p0s[8,:] = [100, 1/100, 0, 1]
p0s[12,:] = [10, 1/2, 0, 1]
p0s[19,:] = [10, 1/2, 0, 1]
p0s[23,:] = [20, 1/2, 0, 1]
p0s[33,:] = [10, 1/2, 0, 1]
p0s[35,:] = [20, 1/2, 0, 1]
p0s[:,1] = p0s[:,1]/100
p0s[:,3] = p0s[:,3]/100
# T1_inv = lambda x: (x+7843.875)/2986.897
# T2_inv = lambda x: x/156575.98
coeffs_new = np.zeros(p0s.shape)
for k in range(36):
    i = k//6
    j = k%6
    ax = axes[i,j]
#     ax.plot(x0, y0s[:,k], 'r')
    ax.scatter(0, 0, s=10, c='k')
    x = train_fc1[:,k].detach().numpy()
    y = train_act1[:,k].detach().numpy()
    idx = np.argsort(x)
    x = x[idx]
    y = y[idx]
    ax.scatter(x, y, s=1, alpha=0.3)
    ax.set_xlabel('Input Intensity')
    ax.set_ylabel('Output Intensity')
#     popt, _ = curve_fit(func2, x, y, p0=p0s[k,:], maxfev=5000)
#     print(popt)
#     coeffs_new[k,:] = popt
#     coeffs_new[k,0] = coeffs_new[k,0]*50
#     coeffs_new[k,2] = coeffs_new[k,2]*50
#     ax.plot(x0, func2(x0, *coeffs_new[k,:])/50, 'r--')
#     ax.plot(x0, func2(x0, *coeffs_orig[k,:])/50, 'b')
    

        #ax[i, j].set_xticks([])
        #ax[i, j].set_yticks([])
#plt.savefig('Old_intensifier_nonlinearity', dpi = 1200)
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

# Check fc2

In [58]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

fig, ax = plt.subplots(1, 1, figsize=(8, 5))

fc2_calc_batch = hk_fc2.output.cpu()[range(100),:].detach().numpy()
fc2_meas_batch = next(enumerate(train_loader_fc2))[1][0][:100,:].detach().numpy()
reg = LinearRegression().fit(fc2_calc_batch.reshape(-1,1), fc2_meas_batch.reshape(-1,1))
for idx in range(100):
    fc2_calc = fc2_calc_batch[idx,:]
    fc2_meas = fc2_meas_batch[idx,:]
    ax.scatter(fc2_calc, fc2_meas, alpha=0.5, linestyle="solid", s=12, label=f"sample {idx}")

# x0 = np.arange(0,10,1).reshape(-1,1)
# T2_inv = lambda x: x * reg.coef_ + reg.intercept_
# ax.plot(x0, T2_inv(x0))
# axes labels and title
ax.set_xlabel("fc2 calculated from physical ground truth", fontname="Arial")
ax.set_ylabel("fc2 measured from camera", fontname="Arial")
ax.set_xlim(0,)
ax.set_ylim(0,)
#plt.title("title")

#auto legend setting and figure config
ax.legend()
plt.tight_layout()

plt.show()
print(reg.coef_)
print(reg.intercept_)

<IPython.core.display.Javascript object>



[[0.17847957]]
[30.85873]


In [14]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.optimize import curve_fit

def T2(x, a, b, c):
    return a*np.exp(b*x) + c

fig, ax = plt.subplots(1, 1, figsize=(8, 6))

fc2_calc_batch = hk_fc2.output.cpu()[range(3000),:].detach().numpy()
#fc2_meas_batch = train_fc2[:100,:].detach().numpy()
fc2_meas_batch = next(enumerate(train_loader_fc2))[1][0][:3000,:].detach().numpy()
x0 = np.arange(0,120,1).reshape(-1,1)
recali_coeffs = []
recali_bias = []

for d in range(4):
    fc2_calc = fc2_calc_batch[:,d]
    fc2_meas = fc2_meas_batch[:,d]
    ax.scatter(fc2_calc, fc2_meas, alpha=0.3, s=5, label=f"fc2_out ROI {d}")
#     popt, _ = curve_fit(T2, fc2_calc, fc2_meas, p0=p0s[d,:], maxfev=5000)
#     ax.plot(x0, T2(x0, *popt))
#     fc2_recali_coeffs[d,:] = popt
    reg = LinearRegression().fit(fc2_calc_batch[:,d].reshape(-1,1), fc2_meas_batch[:,d].reshape(-1,1))
    recali_coeffs.append(reg.coef_.item())
    recali_bias.append(reg.intercept_.item())
    T2_inv = lambda x: x * reg.coef_ + reg.intercept_
    ax.plot(x0, T2_inv(x0))
# for idx in range(100):
#     fc2_calc = fc2_calc_batch[idx,:]
#     fc2_meas = fc2_meas_batch[idx,:]
#     ax.scatter(fc2_calc, fc2_meas, alpha=0.5, linestyle="solid", s=12, label=f"sample {idx}")


#T2_inv = lambda x: x * reg.coef_ + reg.intercept_
#ax.plot(x0, T2_inv(x0))
# axes labels and title
ax.set_xlabel("fc2 output calculated from the digital NN", fontname="Arial")
ax.set_ylabel("fc2 output measured from camera (pixel value)", fontname="Arial")
ax.set_xlim(0,)
ax.set_ylim(0,)
#plt.title("title")

#auto legend setting and figure config
ax.legend()
plt.tight_layout()

plt.show()
recali_coeffs = torch.tensor(recali_coeffs)
recali_bias = torch.tensor(recali_bias)

print(recali_coeffs)
print(recali_bias)
#plt.savefig("fc2_out_linearly_corrected.png", dpi=300, format="png")


<IPython.core.display.Javascript object>

tensor([1.0000, 1.0000, 1.0000, 1.0000])
tensor([7.6294e-06, 1.1444e-05, 0.0000e+00, 0.0000e+00])


# Test Runs

In [61]:
model = OpticalClassifier3(36, 4)

#digifc_init_params = model.digifc._parameters
# model_ckpt = torch.load("./Gen3_fc2_finetune_v12_ep1037.pt", map_location=torch.device('cpu')) 
# model_state_dict = model_ckpt["model_state_dict"]
# with torch.no_grad():
#     model.fc2.W_Opt.copy_(model_state_dict['fc2.W_Opt'])
#     model.digifc.weight.copy_(model_state_dict['digifc.weight'])
#     model.digifc.bias.copy_(model_state_dict['digifc.bias'])
#model.load_state_dict(model_state_dict)
#model.fc2.W_Opt.copy_(model_state_dict["fc2.W_Opt"])
#model.fc1.training = False
#model.digifc._parameters = digifc_init_params


In [72]:
""" perceptron baseline """

args.wandb = False

model = OpticalClassifier3(36, 4)

if args.cuda:
    torch.cuda.set_device(0)
    model.cuda() # transfer the model from cpu to gpu

# Configure loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=4E-3, betas=(0.999, 0.999), weight_decay=0e-4)

# Loop around epoches
for epoch in range(1, args.epochs + 1):
    train(epoch, model, optimizer, criterion, train_loader_fc2)
    loss, accu = test(epoch, model, criterion, test_loader_fc2)


Test set: Epoch 100, Average loss: 2.7936, Accuracy: 79/200(40%)


Test set: Epoch 200, Average loss: 1.1618, Accuracy: 125/200(62%)


Test set: Epoch 300, Average loss: 0.6506, Accuracy: 143/200(72%)


Test set: Epoch 400, Average loss: 0.5189, Accuracy: 158/200(79%)


Test set: Epoch 500, Average loss: 0.4667, Accuracy: 164/200(82%)


Test set: Epoch 600, Average loss: 0.4136, Accuracy: 161/200(80%)


Test set: Epoch 700, Average loss: 0.3866, Accuracy: 167/200(84%)


Test set: Epoch 800, Average loss: 0.3652, Accuracy: 168/200(84%)


Test set: Epoch 900, Average loss: 0.3502, Accuracy: 171/200(86%)


Test set: Epoch 1000, Average loss: 0.3357, Accuracy: 172/200(86%)


Test set: Epoch 1100, Average loss: 0.3256, Accuracy: 174/200(87%)


Test set: Epoch 1200, Average loss: 0.3178, Accuracy: 174/200(87%)


Test set: Epoch 1300, Average loss: 0.3105, Accuracy: 175/200(88%)


Test set: Epoch 1400, Average loss: 0.3074, Accuracy: 173/200(86%)


Test set: Epoch 1500, Average loss: 0.3017,

In [16]:
model = OpticalClassifier3(36, 4)

#digifc_init_params = model.digifc._parameters
model_ckpt = torch.load("./S32_AdamW_lr_0.018653_bs_128_lrf_0.0187_b1_0.9904_b2_0.9994_swalr_0.000689_3000_v_2/ep5973.pt", map_location=torch.device('cpu')) 
model_state_dict = model_ckpt["model_state_dict"]
# with torch.no_grad():
#     model.fc2.W_Opt.copy_(model_state_dict['fc2.W_Opt'])
#     model.digifc.weight.copy_(model_state_dict['digifc.weight'])
#     model.digifc.bias.copy_(model_state_dict['digifc.bias'])
model.load_state_dict(model_state_dict)
#model.fc2.W_Opt.copy_(model_state_dict["fc2.W_Opt"])
#model.fc1.training = False
#model.digifc._parameters = digifc_init_params


<All keys matched successfully>

In [17]:
import matplotlib.pyplot as plt

model.cpu()
# hkfc3 = Hook(model.encoder_nn[4])
B = next(enumerate(test_loader_fc1))[1]
test_images = B[0].to(device="cpu")
test_labels = B[1]
pred = model(test_images).data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct = pred.eq(test_labels.data.view_as(pred)).cpu().sum()
accuracy = 100. * correct / len(test_labels)
print(correct, accuracy)

tensor(177) tensor(88.5000)


In [92]:
hk_fc1 = Hook(model.fc1)
model(next(enumerate(train_loader_phys))[1][0].cuda())
plt.figure()
x = hk_fc1.output
y = model.nonlinear(x)
x, y = x.cpu().detach().numpy(), y.cpu().detach().numpy()
for i in range(x.shape[0]):
    plt.scatter(x[i,:], y[i,:], s=6)

<IPython.core.display.Javascript object>

In [105]:
args.wandb = False

model = OpticalClassifier(36,4)

if args.cuda:
    torch.cuda.set_device(0)
    model.cuda() # transfer the model from cpu to gpu

# Configure loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=5E-4, betas=(0.9, 0.999), weight_decay=0e-4)

# Loop around epoches
for epoch in range(1, args.epochs + 1):
    train(epoch, model, optimizer, criterion, train_loader_phys)
    loss, accu = test(epoch, model, criterion, test_loader_phys)

Train Epoch: 1 		Loss: 2.285256	Accuracy:  7.10%

Test set: Epoch 1, Average loss: 2.3046, Accuracy: 8/200(4%)

Train Epoch: 2 		Loss: 2.197292	Accuracy:  5.00%

Test set: Epoch 2, Average loss: 2.2419, Accuracy: 8/200(4%)

Train Epoch: 3 		Loss: 2.204755	Accuracy:  6.40%

Test set: Epoch 3, Average loss: 2.1860, Accuracy: 17/200(8%)

Train Epoch: 4 		Loss: 2.168140	Accuracy:  12.00%

Test set: Epoch 4, Average loss: 2.1346, Accuracy: 34/200(17%)

Train Epoch: 5 		Loss: 2.064961	Accuracy:  17.80%

Test set: Epoch 5, Average loss: 2.0861, Accuracy: 39/200(20%)

Train Epoch: 6 		Loss: 2.069715	Accuracy:  19.60%

Test set: Epoch 6, Average loss: 2.0395, Accuracy: 40/200(20%)

Train Epoch: 7 		Loss: 1.863595	Accuracy:  28.70%

Test set: Epoch 7, Average loss: 1.9937, Accuracy: 73/200(36%)

Train Epoch: 8 		Loss: 1.972235	Accuracy:  38.10%

Test set: Epoch 8, Average loss: 1.9472, Accuracy: 77/200(38%)

Train Epoch: 9 		Loss: 2.033425	Accuracy:  38.70%

Test set: Epoch 9, Average loss: 1.90

Train Epoch: 71 		Loss: 0.881938	Accuracy:  73.40%

Test set: Epoch 71, Average loss: 0.8046, Accuracy: 149/200(74%)

Train Epoch: 72 		Loss: 0.845748	Accuracy:  71.50%

Test set: Epoch 72, Average loss: 0.7952, Accuracy: 148/200(74%)

Train Epoch: 73 		Loss: 0.798952	Accuracy:  74.70%

Test set: Epoch 73, Average loss: 0.7878, Accuracy: 150/200(75%)

Train Epoch: 74 		Loss: 0.913201	Accuracy:  74.80%

Test set: Epoch 74, Average loss: 0.7785, Accuracy: 149/200(74%)

Train Epoch: 75 		Loss: 0.837537	Accuracy:  75.90%

Test set: Epoch 75, Average loss: 0.7734, Accuracy: 148/200(74%)

Train Epoch: 76 		Loss: 0.832203	Accuracy:  73.90%

Test set: Epoch 76, Average loss: 0.7606, Accuracy: 149/200(74%)

Train Epoch: 77 		Loss: 0.829254	Accuracy:  75.00%

Test set: Epoch 77, Average loss: 0.7537, Accuracy: 147/200(74%)

Train Epoch: 78 		Loss: 0.749142	Accuracy:  76.70%

Test set: Epoch 78, Average loss: 0.7464, Accuracy: 150/200(75%)

Train Epoch: 79 		Loss: 0.824553	Accuracy:  75.00%

Test


Train Epoch: 140 		Loss: 0.531157	Accuracy:  82.20%

Test set: Epoch 140, Average loss: 0.5049, Accuracy: 165/200(82%)

Train Epoch: 141 		Loss: 0.554344	Accuracy:  82.00%

Test set: Epoch 141, Average loss: 0.5023, Accuracy: 162/200(81%)

Train Epoch: 142 		Loss: 0.671980	Accuracy:  81.70%

Test set: Epoch 142, Average loss: 0.4993, Accuracy: 166/200(83%)

Train Epoch: 143 		Loss: 0.509145	Accuracy:  83.80%

Test set: Epoch 143, Average loss: 0.4952, Accuracy: 170/200(85%)

Train Epoch: 144 		Loss: 0.617845	Accuracy:  82.80%

Test set: Epoch 144, Average loss: 0.4978, Accuracy: 165/200(82%)

Train Epoch: 145 		Loss: 0.467394	Accuracy:  83.50%

Test set: Epoch 145, Average loss: 0.4943, Accuracy: 168/200(84%)

Train Epoch: 146 		Loss: 0.493056	Accuracy:  83.50%

Test set: Epoch 146, Average loss: 0.4921, Accuracy: 169/200(84%)

Train Epoch: 147 		Loss: 0.568003	Accuracy:  82.70%

Test set: Epoch 147, Average loss: 0.4898, Accuracy: 169/200(84%)

Train Epoch: 148 		Loss: 0.485628	Accur

Train Epoch: 276 		Loss: 0.451880	Accuracy:  84.70%

Test set: Epoch 276, Average loss: 0.3508, Accuracy: 181/200(90%)

Train Epoch: 277 		Loss: 0.370700	Accuracy:  86.60%

Test set: Epoch 277, Average loss: 0.3494, Accuracy: 181/200(90%)

Train Epoch: 278 		Loss: 0.324027	Accuracy:  85.00%

Test set: Epoch 278, Average loss: 0.3523, Accuracy: 175/200(88%)

Train Epoch: 279 		Loss: 0.334773	Accuracy:  85.70%

Test set: Epoch 279, Average loss: 0.3470, Accuracy: 180/200(90%)

Train Epoch: 280 		Loss: 0.359043	Accuracy:  85.50%

Test set: Epoch 280, Average loss: 0.3495, Accuracy: 181/200(90%)

Train Epoch: 281 		Loss: 0.324274	Accuracy:  85.60%

Test set: Epoch 281, Average loss: 0.3468, Accuracy: 181/200(90%)

Train Epoch: 282 		Loss: 0.406358	Accuracy:  85.10%

Test set: Epoch 282, Average loss: 0.3490, Accuracy: 177/200(88%)

Train Epoch: 283 		Loss: 0.361800	Accuracy:  84.90%

Test set: Epoch 283, Average loss: 0.3472, Accuracy: 180/200(90%)

Train Epoch: 284 		Loss: 0.363714	Accura

Train Epoch: 412 		Loss: 0.391282	Accuracy:  86.40%

Test set: Epoch 412, Average loss: 0.3014, Accuracy: 180/200(90%)

Train Epoch: 413 		Loss: 0.337651	Accuracy:  85.10%

Test set: Epoch 413, Average loss: 0.3057, Accuracy: 178/200(89%)

Train Epoch: 414 		Loss: 0.327940	Accuracy:  86.30%

Test set: Epoch 414, Average loss: 0.3018, Accuracy: 180/200(90%)

Train Epoch: 415 		Loss: 0.307455	Accuracy:  86.10%

Test set: Epoch 415, Average loss: 0.3043, Accuracy: 180/200(90%)

Train Epoch: 416 		Loss: 0.288037	Accuracy:  87.00%

Test set: Epoch 416, Average loss: 0.3001, Accuracy: 181/200(90%)

Train Epoch: 417 		Loss: 0.272555	Accuracy:  87.20%

Test set: Epoch 417, Average loss: 0.3096, Accuracy: 176/200(88%)

Train Epoch: 418 		Loss: 0.269760	Accuracy:  85.40%

Test set: Epoch 418, Average loss: 0.3038, Accuracy: 180/200(90%)

Train Epoch: 419 		Loss: 0.348869	Accuracy:  86.60%

Test set: Epoch 419, Average loss: 0.3003, Accuracy: 180/200(90%)

Train Epoch: 420 		Loss: 0.347946	Accura

KeyboardInterrupt: 

In [106]:
hk_fc1 = Hook(model.fc1)
model(next(enumerate(train_loader_phys))[1][0].cuda())
plt.figure()
x = hk_fc1.output
y = model.nonlinear(x)
x, y = x.cpu().detach().numpy(), y.cpu().detach().numpy()
for i in range(x.shape[0]):
    plt.scatter(x[i,:], y[i,:], s=6)

<IPython.core.display.Javascript object>

In [33]:
args.wandb = False

if args.cuda:
    torch.cuda.set_device(0)
    model.cuda() # transfer the model from cpu to gpu

# Configure loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=5E-4, betas=(0.9, 0.999), weight_decay=0e-4)

# Loop around epoches
for epoch in range(1, args.epochs + 1):
    train(epoch, model, optimizer, criterion, train_loader_phys)
    loss, accu = test(epoch, model, criterion, test_loader_phys)


Test set: Epoch 100, Average loss: 1.0404, Accuracy: 113/200(56%)



KeyboardInterrupt: 

# Plots

In [20]:
%matplotlib notebook

In [25]:
model = OpticalClassifier2(36, 4)
# model_ckpt = torch.load("./Reload_AdamW_lr_0.005657_bs_64_lrf_0.0080_b1_0.9847_b2_0.9436_swalr_0.005_1000_v_59/ep290.pt", map_location=torch.device('cpu')) 
# model_ckpt = torch.load("./Reload_AdamW_lr_0.005736_bs_64_lrf_0.0081_b1_0.9837_b2_0.9385_swalr_0.005_1000_v_58/ep2159.pt", map_location=torch.device('cpu')) 
model_ckpt = torch.load("./AdamW_lr_0.005140_bs_64_lrf_0.0073_b1_0.9995_b2_0.9996_swalr_0.000422_1500_v_17/ep921.pt", map_location=torch.device('cpu')) 
# model_ckpt = torch.load("./Reload_AdamW_lr_0.004407_bs_64_lrf_0.0062_b1_0.9920_b2_0.9577_swalr_0.004_1000_v_33/ep2478.pt", map_location=torch.device('cpu')) 

model_state_dict = model_ckpt["model_state_dict"]
model.load_state_dict(model_state_dict)

<All keys matched successfully>

In [26]:
model.nonlinear.params

Parameter containing:
tensor([[ 4.2605e+02,  3.5174e-02, -5.7327e+00,  5.2580e+00],
        [ 6.1506e+01,  9.6526e-02,  5.8841e+02,  1.0870e-02],
        [ 7.0700e+01,  1.1211e-01,  2.5055e+03,  1.9284e-03],
        [ 5.2562e+02,  9.3226e-03,  8.1585e+01,  8.6148e-02],
        [ 6.7444e+02,  1.0104e-02,  4.3830e+01,  1.5157e-01],
        [ 7.3232e+02,  1.0176e-02,  6.8957e+01,  1.3964e-01],
        [ 4.2756e+02,  1.1948e-02,  9.1883e+01,  7.2031e-02],
        [ 2.4695e+02,  2.2003e-02,  1.8514e+02,  2.2003e-02],
        [ 1.8211e+05,  6.9360e-06,  1.6148e+02,  3.2329e-02],
        [ 5.5647e+02,  4.3578e-03,  9.8354e+01,  4.2465e-02],
        [ 1.8594e+02,  3.3396e-02,  1.2071e+04,  1.0443e-04],
        [ 1.4162e+04,  7.0703e-05,  4.4031e+02,  1.4798e-02],
        [ 3.6910e+02,  1.7240e-02,  5.5869e+01,  9.6804e-02],
        [ 2.4974e+02,  3.2915e-02,  7.8178e+03,  5.9642e-05],
        [ 3.2570e+02,  1.7526e-02,  5.8618e+00,  4.8532e-01],
        [ 1.5857e+02,  2.0577e-02,  1.4929e+02, 

In [27]:
model_ckpt['model_state_dict']

OrderedDict([('coeffs',
              tensor([[ 4.2605e+02,  3.5174e-02, -5.7327e+00,  5.2580e+00],
                      [ 6.1506e+01,  9.6526e-02,  5.8841e+02,  1.0870e-02],
                      [ 7.0700e+01,  1.1211e-01,  2.5055e+03,  1.9284e-03],
                      [ 5.2562e+02,  9.3226e-03,  8.1585e+01,  8.6148e-02],
                      [ 6.7444e+02,  1.0104e-02,  4.3830e+01,  1.5157e-01],
                      [ 7.3232e+02,  1.0176e-02,  6.8957e+01,  1.3964e-01],
                      [ 4.2756e+02,  1.1948e-02,  9.1883e+01,  7.2031e-02],
                      [ 2.4695e+02,  2.2003e-02,  1.8514e+02,  2.2003e-02],
                      [ 1.8211e+05,  6.9360e-06,  1.6148e+02,  3.2329e-02],
                      [ 5.5647e+02,  4.3578e-03,  9.8354e+01,  4.2465e-02],
                      [ 1.8594e+02,  3.3396e-02,  1.2071e+04,  1.0443e-04],
                      [ 1.4162e+04,  7.0703e-05,  4.4031e+02,  1.4798e-02],
                      [ 3.6910e+02,  1.7240e-02,  5.5869e+01,  9

In [28]:
import torchvision
import matplotlib.pyplot as plt

W = model.fc1.W_Opt.to(device='cpu')
W = W.view(-1,1,40,40)
W = W.repeat(1,3,1,1)
W_vis = torchvision.utils.make_grid(W, padding=4, pad_value=1, nrow=6)
plt.figure()
plt.imshow(W_vis[0,:,:].detach().numpy(), cmap="Reds")
plt.axis("off")
plt.colorbar()
plt.show()
#plt.savefig(f"./figures/publish/fcs_0_weights.svg", dpi=300, format="svg")

<IPython.core.display.Javascript object>

In [29]:
(model0.fc1.W_Opt - model.fc1.W_Opt.cpu()).max(axis=1)

torch.return_types.max(
values=tensor([1.0601, 1.0363, 1.0495, 1.0669, 1.0445, 1.0398, 1.0139, 1.0640, 0.9879,
        1.0448, 1.0443, 1.0455, 1.0696, 1.0284, 1.0683, 1.0279, 1.0697, 1.0802,
        1.0582, 1.0468, 1.0545, 1.0059, 1.0492, 1.0572, 1.0637, 1.0453, 1.0505,
        1.0421, 1.0508, 1.0212, 1.0607, 1.0198, 1.0426, 1.0524, 1.0596, 1.0429],
       grad_fn=<MaxBackward0>),
indices=tensor([1098, 1026,  974,  414,  382,  480, 1537,  497,   96,  657,   23, 1088,
         605, 1224,  713, 1224,  869,  999,  600,   14,  341,  501,  503,  894,
         308,  897, 1039,  524,  452,   97,   15, 1262,   16,   23,  267, 1016]))

In [30]:
import torchvision
import matplotlib.pyplot as plt

model.to("cpu")
W = model.fc2.W_Opt
W = W.view(-1,1,6,6)
W = W.repeat(1,3,1,1)
W_vis = torchvision.utils.make_grid(W, padding=4, pad_value=1, nrow=10)
plt.figure()
plt.imshow(W_vis[0,:,:].detach().numpy(), cmap="Reds")
plt.axis("off")
plt.colorbar(ticks=np.linspace(-1,1, 5))
plt.show()
#plt.savefig(f"./figures/publish/fcs_0_weights.svg", dpi=300, format="svg")

<IPython.core.display.Javascript object>

In [31]:
model.fc2.W_Opt

Parameter containing:
tensor([[8.9814e-01, 3.2989e-01, 3.1654e-01, 0.0000e+00, 5.9775e-01, 2.7443e-01,
         0.0000e+00, 5.1049e-01, 0.0000e+00, 1.0000e+00, 2.9444e-01, 5.5828e-03,
         1.0000e+00, 2.4475e-01, 0.0000e+00, 1.1375e-01, 0.0000e+00, 2.1852e-02,
         6.8169e-01, 0.0000e+00, 5.4452e-03, 1.3471e-01, 4.1068e-01, 3.1356e-02,
         0.0000e+00, 8.0786e-01, 1.1044e-04, 3.8939e-01, 8.3131e-01, 0.0000e+00,
         2.7715e-01, 0.0000e+00, 6.4145e-01, 5.5458e-01, 0.0000e+00, 2.4005e-02],
        [0.0000e+00, 5.3873e-01, 0.0000e+00, 4.0764e-01, 9.3904e-01, 3.9323e-01,
         1.5241e-02, 4.0868e-01, 0.0000e+00, 1.3408e-02, 0.0000e+00, 8.8680e-01,
         0.0000e+00, 2.5916e-01, 6.4483e-02, 7.4736e-01, 9.3495e-01, 1.4731e-01,
         0.0000e+00, 8.5929e-03, 1.3223e-01, 1.0000e+00, 0.0000e+00, 0.0000e+00,
         4.8253e-02, 1.4637e-01, 5.6388e-02, 6.4587e-01, 3.0934e-01, 0.0000e+00,
         0.0000e+00, 7.8499e-01, 6.4268e-01, 4.7351e-01, 8.7048e-01, 1.5005e-01],
    

In [32]:
model0.digifc.bias - model.digifc.bias.cpu()

tensor([-5.6378, -2.5185, -1.8313,  1.1078, -4.1777], grad_fn=<SubBackward0>)

In [33]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(9, 5.5))

line, = ax.plot(model.fc2.W_Opt.cpu().flatten().detach().numpy(), alpha=0.5, linestyle="solid", label="post-retrain", zorder=0)

# axes labels and title
ax.set_xlabel("x axis", fontname="Arial")
ax.set_ylabel("y axis", fontname="Arial")
#plt.title("title")

#auto legend setting and figure config
ax.legend()
plt.tight_layout()

plt.show()

<IPython.core.display.Javascript object>

In [34]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(9, 5.5))

#line0, = ax.plot(model0.digifc.weight.flatten().detach().numpy(), alpha=0.5, linestyle="solid", label="pre-retrain", zorder=0)
line, = ax.plot(model.digifc.weight.cpu().flatten().detach().numpy(), alpha=0.5, linestyle="solid", label="post-retrain", zorder=0)

# axes labels and title
ax.set_xlabel("x axis", fontname="Arial")
ax.set_ylabel("y axis", fontname="Arial")
#plt.title("title")

#auto legend setting and figure config
ax.legend()
plt.tight_layout()

plt.show()

<IPython.core.display.Javascript object>

In [35]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(9, 5.5))

#line0, = ax.plot(model0.digifc.bias.flatten().detach().numpy(), alpha=0.5, linestyle="solid", label="pre-retrain", zorder=0)
line, = ax.plot(model.digifc.bias.cpu().flatten().detach().numpy(), alpha=0.5, linestyle="solid", label="post-retrain", zorder=0)

# axes labels and title
ax.set_xlabel("x axis", fontname="Arial")
ax.set_ylabel("y axis", fontname="Arial")
#plt.title("title")

#auto legend setting and figure config
ax.legend()
plt.tight_layout()

plt.show()

<IPython.core.display.Javascript object>

In [36]:
def model_validation(model, device, data_loader):
    test_loss = 0
    correct = 0
    criterion = nn.CrossEntropyLoss()
    for data, target in data_loader:
        if args.cuda:
            data, target = data.to(device), target.to(device)
        output = model(data)
        test_loss += criterion(output, target).item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(data_loader.dataset)
    accuracy = 100. * correct / len(data_loader.dataset)
    return {"accuracy": accuracy, "test_loss": test_loss}

In [37]:
model_validation(model, 'cpu', test_loader_act1)

{'accuracy': tensor(93.), 'test_loss': 0.0022452685050666334}

In [21]:
np.savez("Cell_nonlinear_fc2digifc_retrain_Apr_18_v17_ep921.npz", fc2_W_Opt = model.fc2.W_Opt.detach().numpy())

In [33]:
print(torch.__version__, optuna.__version__, ray.__version__)

1.11.0+cu102 1.5.0 1.11.0
