In [2]:
import os
import sys
import math
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
torch.cuda.empty_cache()

from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd

In [3]:
current_dir = os.path.dirname(os.path.realpath('__file__'))


from utils import plot_3d_slices
from utils import set_seeds
from utils import set_device
from utils import get_optimizer_nn
from utils import init_weights_xavier
from utils import get_args
from utils import save_args
from utils import Log
data_dir = os.path.join(current_dir, 'data')

sys.path.append(data_dir)
from make_dataset import get_dataloaders

# Construct the path to the models directory
models_dir = os.path.join(current_dir, 'models')

# Add the models directory to sys.path
sys.path.append(models_dir)
from pipnet import get_network, PIPNet
from train_model import train_pipnet
#from test_model import eval_pipnet
from resnet_features import video_resnet18_features

import datetime
from importlib import reload

In [5]:
now2=datetime.datetime.now()

In [6]:
(now2-now).seconds

5

In [3]:
net = "3Dresnet18"
task_performed = "Train PIPNet"

#args = get_args(current_fold, net, task_performed)

#TODO update
args={
    'seed':42,
    'experiment_folder':'data/experiment_1',
    'lr':.05,
    'lr_net':.05,
    'lr_block':.05,
    'weight_decay':.05,
    'gamma':2,
    'step_size':1,
    'batch_size':1,
    'epochs':5,
    'num_classes':2,
    'channels':3,
    'net':"3Dresnet18",
    'num_features':0,
    'bias':False,
    'out_shape':1,
    'disable_pretrained':False,
    'optimizer':'Adam',
    'state_dict_dir_net':'',
    'epochs_pretrain':2,
    'log_dir':'logs/',
    "dic_classes":{False:0,True:1}
}

if not os.path.exists(args['experiment_folder']):
    os.mkdir(args['experiment_folder'])

torch.manual_seed(args["seed"])
torch.cuda.manual_seed_all(args["seed"])
random.seed(args["seed"])
np.random.seed(args["seed"])
 


training_curves_path = os.path.join(args["experiment_folder"], 'training.png')
best_weights_path = os.path.join(args["experiment_folder"], 'best_model.pth')
hyperparameters_file = os.path.join(args["experiment_folder"], 
                                    'hyperparameters.json')
report_file = os.path.join(args["experiment_folder"], 'classification_report.txt')

# Hyperparameters

hyperparameters = {"Learning Rate" : args["lr"],
                   "Weight Decay" : args["weight_decay"],
                   "Gamma" : args["gamma"],
                   "Step size" : args["step_size"],
                   "Batch Size" : args["batch_size"],
                   "Epochs" : args["epochs"],
                   "Training Time" : 0}
        
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [4]:
yflags=pd.read_csv("../duke/ClinicalFlags.csv",index_col=0)

downSample=2.0
lowerBound=.2
inputData=outputFile=f'data/firstpass_923_avgCropResize_DS{int(downSample*10)}_point{int(lowerBound*100)}Thresh.h5'


dataloaders=get_dataloaders(dataset_h5path=inputData,
                            yflag_df=yflags,
                            yLabelColumn='StagingNodes',
                            k_fold=5,
                            test_p=.2,
                            val_p=.05,
                            batchSize=args['batch_size'],)

trainloader = dataloaders[0]
trainloader_pretraining = dataloaders[1]
trainloader_normal = dataloaders[2] 
trainloader_normal_augment = dataloaders[3]
projectloader = dataloaders[4]
valloader = dataloaders[5]
testloader = dataloaders[6] 
test_projectloader = dataloaders[7]

In [5]:
useGPU=True
devID=0
if useGPU:
    device=torch.device(f'cuda:{devID}')
else:
    device=torch.device('cpu')

In [6]:
from resnet_features import video_resnet18_features

class NonNegLinear(nn.Module):
    
    """
    Applies a linear transformation to the incoming data with non-negative 
    weights` """
    
    def __init__(self, 
                 in_features: int, 
                 out_features: int, 
                 bias: bool = True,
                 device = None, 
                 dtype = None) -> None:
        
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(NonNegLinear, self).__init__()
        
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(
            torch.empty((out_features, in_features), **factory_kwargs))
        self.normalization_multiplier = nn.Parameter(
            torch.ones((1,), requires_grad = True))
        
        if bias:
            self.bias = nn.Parameter(
                torch.empty(out_features, **factory_kwargs))
        else:
            self.register_parameter('bias', None)

    def forward(self, input: Tensor) -> Tensor:
        return F.linear(input, torch.relu(self.weight), self.bias)
    

def get_network(num_classes: int,
                args):
    features=video_resnet18_features(
        pretrained = not args['disable_pretrained'])
    first_add_on_layer_in_channels = \
            [i for i in features.modules() if isinstance(i, nn.Conv3d)][-1].out_channels
    
    if args['num_features'] == 0:
        num_prototypes = first_add_on_layer_in_channels
        print("Number of prototypes: ", num_prototypes, flush=True)
        add_on_layers = nn.Sequential(
            nn.Softmax(dim=1),  # softmax over every prototype for each patch,
                                # such that for every location in image, sum 
                                # over prototypes is 1                
            )
        
    else:
        num_prototypes = args['num_features']
        print("Number of prototypes set from", 
              first_add_on_layer_in_channels, 
              "to", 
              num_prototypes,
              ". Extra 1x1x1 conv layer added. Not recommended.", 
              flush=True)
        
        add_on_layers = nn.Sequential(
            nn.Conv3d(
                in_channels = first_add_on_layer_in_channels, 
                out_channels = num_prototypes, 
                kernel_size = 1, 
                stride = 1, 
                padding = 0, 
                bias = True), 
            nn.Softmax(dim=1),  # softmax over every prototype for each patch, 
                                # such that for every location in image, sum 
                                # over prototypes is 1
            )
        
    pool_layer = nn.Sequential(
        nn.AdaptiveMaxPool3d(output_size=(1,1,1)), # dim: (bs,ps,1,1,1) 
        nn.Flatten()                               # dim: (bs,ps)
        ) 
    
    if args['bias']:
        classification_layer = NonNegLinear(
            num_prototypes,
            num_classes,
            bias=True)
    else:
        classification_layer = NonNegLinear(
            num_prototypes,
            num_classes,
            bias=False)
        
    return features, add_on_layers, pool_layer, classification_layer, num_prototypes


In [8]:
features=video_resnet18_features(
        pretrained = not args['disable_pretrained'])
featureModules=[i for i in features.modules() ]
first_add_on_layer_in_channels = \
            [i for i in features.modules() if isinstance(i, nn.Conv3d)][0].in_channels
    

In [10]:
featureModules

[VideoResNet_features(
   (stem): BasicStem(
     (0): Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False)
     (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (2): ReLU(inplace=True)
   )
   (layer1): Sequential(
     (0): BasicBlock(
       (conv1): Sequential(
         (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
         (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         (2): ReLU(inplace=True)
       )
       (conv2): Sequential(
         (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
         (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       )
       (relu): ReLU(inplace=True)
     )
     (1): BasicBlock(
       (conv1): Sequential(
         (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 

In [24]:
first_add_on_layer_in_channels

3

In [8]:
network_layers = get_network(num_classes=args['num_classes'], args=args)

Number of prototypes:  512


In [9]:
feature_net = network_layers[0]
add_on_layers = network_layers[1]
pool_layer = network_layers[2]
classification_layer = network_layers[3]
num_prototypes = network_layers[4]
newFeatures=feature_net
"""
### let's try hacking in our layer here?
testLayer=[nn.Conv3d(in_channels = 1, 
                out_channels = 3, 
                kernel_size =1, 
                stride = 1, 
                padding = 1, 
                bias = True),]
newFeatures=nn.Sequential(testLayer[0],feature_net)
"""


net = PIPNet(
    num_classes = args['num_classes'],
    num_prototypes = num_prototypes,
    feature_net = newFeatures,
    args = args,
    add_on_layers = add_on_layers,
    pool_layer = pool_layer,
    classification_layer = classification_layer
    )

net = net.to(device=device)
net = nn.DataParallel(net, device_ids = [0])  

In [10]:
torch.cuda.memory_allocated()/1024**2 # around 130MB. okay

130.076171875

In [10]:
### from utils

def get_optimizer_nn(
        net, 
        args: dict
        ) -> torch.optim.Optimizer:
    
    torch.manual_seed(args["seed"])
    torch.cuda.manual_seed_all(args["seed"])
    random.seed(args["seed"])
    np.random.seed(args["seed"])

    # create parameter groups
    params_to_freeze = []
    params_to_train = []
    params_backbone = []
    
    # set up optimizer
    if '3Dresnet18' or '3Duxnet' in args["net"]:
        print("Network is ", args["net"], flush = True)
        # Train all the backbone
        for name, param in net.module._net.named_parameters():
            params_to_train.append(param)
    else:
        print("Network not implemented", flush = True)     
    
    classification_weight = []
    classification_bias = []
    for name, param in net.module._classification.named_parameters():
        if 'weight' in name:
            classification_weight.append(param)
        elif 'multiplier' in name:
            param.requires_grad = False
        else:
            if args["bias"]:
                classification_bias.append(param)
    
    paramlist_net = [
            {"params": params_backbone, 
             "lr": args["lr_net"], 
             "weight_decay_rate": args["weight_decay"]},
            {"params": params_to_freeze, 
             "lr": args["lr_block"], 
             "weight_decay_rate": args["weight_decay"]},
            {"params": params_to_train, 
             "lr": args["lr_block"], 
             "weight_decay_rate": args["weight_decay"]},
            {"params": net.module._add_on.parameters(), 
             "lr": args["lr_block"]*10., 
             "weight_decay_rate": args["weight_decay"]}]
            
    paramlist_classifier = [
            {"params": classification_weight, 
             "lr": args["lr"], 
             "weight_decay_rate": args["weight_decay"]},
            {"params": classification_bias, 
             "lr": args["lr"], 
             "weight_decay_rate": 0},]
          
    if args["optimizer"] == 'Adam':
        optimizer_net = torch.optim.AdamW(
            paramlist_net,
            lr = args["lr"],
            weight_decay = args["weight_decay"])
        optimizer_classifier = torch.optim.AdamW(
            paramlist_classifier,
            lr = args["lr"],
            weight_decay = args["weight_decay"])
        return optimizer_net, optimizer_classifier, params_to_freeze, params_to_train, params_backbone
    
    else:
        raise ValueError("this optimizer type is not implemented")

In [11]:
optimizer = get_optimizer_nn(net, args)
optimizer_net = optimizer[0]
optimizer_classifier = optimizer[1] 
params_to_freeze = optimizer[2] 
params_to_train = optimizer[3] 
params_backbone = optimizer[4]   

Network is  3Dresnet18


In [11]:
### big chonk on GPU alloc
# Initialize or load model
with torch.no_grad():
    
    if args['state_dict_dir_net'] != '':
        
        epoch = 0
        checkpoint = torch.load(
            args['state_dict_dir_net'], map_location = device)
        net.load_state_dict(checkpoint['model_state_dict'], strict = True) 
        print("Pretrained network loaded", flush = True)
        net.module._multiplier.requires_grad = False
        
        try:
            optimizer_net.load_state_dict(
                checkpoint['optimizer_net_state_dict']) 
        except:
            pass
        
        if torch.mean(net.module._classification.weight).item() > 1.0 and \
            torch.mean(net.module._classification.weight).item() < 3.0 \
            and torch.count_nonzero(torch.relu(
                net.module._classification.weight-1e-5)).float().item() > \
            0.8*(num_prototypes*args['num_classes']): 
                
            print("We assume that the classification layer is not yet \
                  trained. We re-initialize it...", 
                  flush = True) # e.g. loading a pretrained backbone only
            
            torch.nn.init.normal_(
                net.module._classification.weight, 
                mean = 1.0,
                std = 0.1) 
            
            torch.nn.init.constant_(net.module._multiplier, val = 2.)
            print("Classification layer initialized with mean", 
                  torch.mean(net.module._classification.weight).item(), 
                  flush = True)
            
            if args['bias']:
                torch.nn.init.constant_(
                    net.module._classification.bias, 
                    val = 0.)
        else:
            if 'optimizer_classifier_state_dict' in checkpoint.keys():
                optimizer_classifier.load_state_dict(
                    checkpoint['optimizer_classifier_state_dict'])
        
    else:
        net.module._add_on.apply(init_weights_xavier)
        torch.nn.init.normal_(
            net.module._classification.weight, 
            mean = 1.0,
            std = 0.1) 
        
        if args['bias']:
            torch.nn.init.constant_(
                net.module._classification.bias, 
                val = 0.)
            
        torch.nn.init.constant_(net.module._multiplier, val = 2.)
        net.module._multiplier.requires_grad = False

        print("Classification layer initialized with mean", 
              torch.mean(net.module._classification.weight).item(), 
              flush = True)

# Define classification loss function and scheduler
criterion = nn.NLLLoss(reduction='mean').to(device)

scheduler_net = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer_net, 
    T_max = len(trainloader_pretraining)*args['epochs_pretrain'], 
    eta_min = args['lr_block']/100., 
    last_epoch=-1)

# Forward one batch through the backbone to get the latent output size
with torch.no_grad():
    xs1, _ ,_= next(iter(trainloader))
    xs1 = xs1.to(device)
    proto_features, _, _ = net(xs1)
    wshape = proto_features.shape[-1]
    hshape = proto_features.shape[-2]
    dshape = proto_features.shape[-3]
    args['wshape'] = wshape # needed for calculating image patch size
    args['hshape'] = hshape # needed for calculating image patch size
    args['dshape'] = dshape # needed for calculating image patch size
    print("Output shape: ", proto_features.shape, flush=True)
    del xs1, _

if net.module._num_classes == 2:
    
    # Create a csv log for storing the test accuracy, F1-score, mean train 
    # accuracy and mean loss for each epoch


    #TODO recreate some log but for now, we skip
    """
    log.create_log('log_epoch_overview',
                    'epoch',
                    'test_top1_acc',
                    'test_f1',
                    'almost_sim_nonzeros',
                    'local_size_all_classes',
                    'almost_nonzeros_pooled', 
                    'num_nonzero_prototypes', 
                    'mean_train_acc', 
                    'mean_train_loss_during_epoch')
    
    print("Your dataset only has two classes. Is the number of samples \
          per class similar? If the data is imbalanced, we recommend to \
          use the --weighted_loss flag to account for the imbalance.", 
          flush = True)
          """
        
else:
    
    # Create a csv log for storing the test accuracy (top 1 and top 5), 
    # mean train accuracy and mean loss for each epoch

    """
    print("Create LOG!!")
    log.create_log('log_epoch_overview', 
                    'epoch', 
                    'test_top1_acc', 
                    'test_top5_acc', 
                    'almost_sim_nonzeros', 
                    'local_size_all_classes',
                    'almost_nonzeros_pooled', 
                    'num_nonzero_prototypes', 
                    'mean_train_acc', 
                    'mean_train_loss_during_epoch')
    """

lrs_pretrain_net = []


# 3D-PIPNet Training

Classification layer initialized with mean 1.002476692199707


Output shape:  torch.Size([1, 512, 11, 11, 15])


In [12]:
criterion = nn.NLLLoss(reduction='mean').to(device)

scheduler_net = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer_net, 
    T_max = len(trainloader_pretraining)*args['epochs_pretrain'], 
    eta_min = args['lr_block']/100., 
    last_epoch=-1)

In [13]:
for epoch in range(1, args['epochs_pretrain']+1):
    for param in params_to_train:
        param.requires_grad = True
    for param in net.module._add_on.parameters():
        param.requires_grad = True
    for param in net.module._classification.parameters():
        param.requires_grad = False
    for param in params_to_freeze:
        param.requires_grad = True  # can be set to False when you want to 
                                    # freeze more layers
    for param in params_backbone:
        param.requires_grad = False # can be set to True when you want to 
                                    # train whole backbone (e.g. if dataset 
                                    # is very different from ImageNet)
    
    print("\nPretrain Epoch", 
          epoch, 
          "with batch size", 
          trainloader_pretraining.batch_size, 
          flush = True)
    
    # Pretrain prototypes
    train_info = train_pipnet(
        net, 
        trainloader_pretraining, 
        optimizer_net, 
        optimizer_classifier, 
        scheduler_net, 
        None, 
        criterion, 
        epoch, 
        args['epochs_pretrain'], 
        device, 
        pretrain = True, 
        finetune = False)
    
    lrs_pretrain_net += train_info['lrs_net']
    plt.clf()
    plt.plot(lrs_pretrain_net)
    plt.savefig(os.path.join(args['log_dir'],'lr_pretrain_net.png'))
    """
    
    log.log_values('log_epoch_overview', 
                    epoch, 
                    "n.a.", 
                    "n.a.", 
                    "n.a.", 
                    "n.a.", 
                    "n.a.", 
                    "n.a.", 
                    "n.a.", 
                    train_info['loss'])
    """
if args['state_dict_dir_net'] == '':
    net.eval()
    torch.save(
        {'model_state_dict': net.state_dict(),
          'optimizer_net_state_dict': optimizer_net.state_dict()},
        os.path.join(os.path.join(args['log_dir'], 'checkpoints'), 
                      'net_pretrained'))
    net.train()
    



Pretrain Epoch 1 with batch size 1
Number of parameters that require gradient:  60
Align weight:  0.5 , U_tanh weight:  5.0 Class weight: 0.0
Pretrain? True Finetune? False


RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\utils\data\_utils\worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 54, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\utils\data\_utils\collate.py", line 265, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\utils\data\_utils\collate.py", line 142, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\utils\data\_utils\collate.py", line 142, in <listcomp>
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\utils\data\_utils\collate.py", line 123, in collate
    return collate_fn_map[collate_type](batch, collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\utils\data\_utils\collate.py", line 160, in collate_tensor_fn
    storage = elem._typed_storage()._new_shared(numel, device=elem.device)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\storage.py", line 866, in _new_shared
    untyped_storage = torch.UntypedStorage._new_shared(size * self._element_size(), device=device)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\savio\anaconda3\Lib\site-packages\torch\storage.py", line 260, in _new_shared
    return cls._new_using_filename_cpu(size)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Couldn't open shared file mapping: <0000021B932BCA62>, error code: <1455>


In [None]:
with torch.no_grad():
    if args['epochs_pretrain'] > 0:
        print("Visualize top-k")
        topks, img_prototype, proto_coord = visualize_topk(
            net, 
            projectloader, 
            len(args['dic_classes']),
            device, 
            'visualised_pretrained_prototypes_topk', 
            args,
            save=False)
 

In [None]:
optimizer = get_optimizer_nn(net, args)
optimizer_net = optimizer[0]
optimizer_classifier = optimizer[1] 
params_to_freeze = optimizer[2] 
params_to_train = optimizer[3] 
params_backbone = optimizer[4] 
        
scheduler_net = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer_net, 
    T_max = len(trainloader)*args[epochs, 
    eta_min = args[lr_net/100.)

# Scheduler for the classification layer is with restarts, such that the 
# model can re-active zeroed-out prototypes. Hence an intuitive choice. 
if args[epochs <= 30:
    scheduler_classifier = \
        torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer_classifier, 
            T_0 = 5, 
            eta_min = 0.001, 
            T_mult = 1, 
            verbose = False)
else:
    scheduler_classifier = \
        torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer_classifier, 
            T_0 = 10, 
            eta_min = 0.001, 
            T_mult = 1, 
            verbose = False)
        
for param in net.module.parameters():
    param.requires_grad = False
for param in net.module._classification.parameters():
    param.requires_grad = True

frozen = True
lrs_net = []
lrs_classifier = []
   
for epoch in range(1, args[epochs + 1): 
                 
    epochs_to_finetune = 3  # during finetuning, only train classification 
                            # layer and freeze rest. usually done for a few 
                            # epochs (at least 1, more depends on size of 
                            # dataset)
    if epoch <= epochs_to_finetune and (args[epochs_pretrain > 0 or \
                                        args[state_dict_dir_net != ''):
        for param in net.module._add_on.parameters():
            param.requires_grad = False
        for param in params_to_train:
            param.requires_grad = False
        for param in params_to_freeze:
            param.requires_grad = False
        for param in params_backbone:
            param.requires_grad = False
        finetune = True
    
    else: 
        finetune = False          
        if frozen:
            # unfreeze backbone
            if epoch > (args[freeze_epochs):
                for param in net.module._add_on.parameters():
                    param.requires_grad = True
                for param in params_to_freeze:
                    param.requires_grad = True
                for param in params_to_train:
                    param.requires_grad = True
                for param in params_backbone:
                    param.requires_grad = True   
                frozen = False
            # freeze first layers of backbone, train rest
            else:
                for param in params_to_freeze:
                    param.requires_grad = True # Can be set to False if you 
                                               # want to train fewer layers 
                                               # of backbone
                for param in net.module._add_on.parameters():
                    param.requires_grad = True
                for param in params_to_train:
                    param.requires_grad = True
                for param in params_backbone:
                    param.requires_grad = False
    
    print("\n Epoch", epoch, "frozen:", frozen, flush = True)  
      
    if (epoch == args[epochs or epoch%30 == 0) and args[epochs > 1:
        
        # Set small weights to zero
        with torch.no_grad():
            torch.set_printoptions(profile = "full")
            
            net.module._classification.weight.copy_(torch.clamp(
                net.module._classification.weight.data - 0.001, min=0.)) 
            
            print("Classifier weights: ", 
                  net.module._classification.weight[
                      net.module._classification.weight.nonzero(
                          as_tuple = True)], 
                  (net.module._classification.weight[
                      net.module._classification.weight.nonzero(
                          as_tuple = True)]).shape, 
                  flush = True)
            
            if args[bias:
                print("Classifier bias: ", 
                      net.module._classification.bias, 
                      flush = True)
                
            torch.set_printoptions(profile = "default")
    
    train_info = train_pipnet(
        net, 
        trainloader, 
        optimizer_net, 
        optimizer_classifier, 
        scheduler_net, 
        scheduler_classifier, 
        criterion, 
        epoch, 
        args[epochs, 
        device, 
        pretrain = False, 
        finetune = finetune)
    
    lrs_net += train_info['lrs_net']
    lrs_classifier += train_info['lrs_class']
    

In [None]:
  
for epoch in range(1, args[epochs + 1): 
                 
    epochs_to_finetune = 3  # during finetuning, only train classification 
                            # layer and freeze rest. usually done for a few 
                            # epochs (at least 1, more depends on size of 
                            # dataset)
    if epoch <= epochs_to_finetune and (args[epochs_pretrain > 0 or \
                                        args[state_dict_dir_net != ''):
        for param in net.module._add_on.parameters():
            param.requires_grad = False
        for param in params_to_train:
            param.requires_grad = False
        for param in params_to_freeze:
            param.requires_grad = False
        for param in params_backbone:
            param.requires_grad = False
        finetune = True
    
    else: 
        finetune = False          
        if frozen:
            # unfreeze backbone
            if epoch > (args[freeze_epochs):
                for param in net.module._add_on.parameters():
                    param.requires_grad = True
                for param in params_to_freeze:
                    param.requires_grad = True
                for param in params_to_train:
                    param.requires_grad = True
                for param in params_backbone:
                    param.requires_grad = True   
                frozen = False
            # freeze first layers of backbone, train rest
            else:
                for param in params_to_freeze:
                    param.requires_grad = True # Can be set to False if you 
                                               # want to train fewer layers 
                                               # of backbone
                for param in net.module._add_on.parameters():
                    param.requires_grad = True
                for param in params_to_train:
                    param.requires_grad = True
                for param in params_backbone:
                    param.requires_grad = False
    
    print("\n Epoch", epoch, "frozen:", frozen, flush = True)  
      
    if (epoch == args[epochs or epoch%30 == 0) and args[epochs > 1:
        
        # Set small weights to zero
        with torch.no_grad():
            torch.set_printoptions(profile = "full")
            
            net.module._classification.weight.copy_(torch.clamp(
                net.module._classification.weight.data - 0.001, min=0.)) 
            
            print("Classifier weights: ", 
                  net.module._classification.weight[
                      net.module._classification.weight.nonzero(
                          as_tuple = True)], 
                  (net.module._classification.weight[
                      net.module._classification.weight.nonzero(
                          as_tuple = True)]).shape, 
                  flush = True)
            
            if args[bias:
                print("Classifier bias: ", 
                      net.module._classification.bias, 
                      flush = True)
                
            torch.set_printoptions(profile = "default")
    
    train_info = train_pipnet(
        net, 
        trainloader, 
        optimizer_net, 
        optimizer_classifier, 
        scheduler_net, 
        scheduler_classifier, 
        criterion, 
        epoch, 
        args[epochs, 
        device, 
        pretrain = False, 
        finetune = finetune)
    
    lrs_net += train_info['lrs_net']
    lrs_classifier += train_info['lrs_class']
    
    
    # Evaluate model

    eval_info = eval_pipnet(net, testloader, epoch, device, log)
    log.log_values(
        'log_epoch_overview', 
        epoch, 
        eval_info['top1_accuracy'], 
        eval_info['top5_accuracy'], 
        eval_info['almost_sim_nonzeros'], 
        eval_info['local_size_all_classes'], 
        eval_info['almost_nonzeros'], 
        eval_info['num non-zero prototypes'], 
        train_info['train_accuracy'], 
        train_info['loss'])
    
    with torch.no_grad():
        net.eval()
        torch.save(
            {'model_state_dict': net.state_dict(),
             'optimizer_net_state_dict': optimizer_net.state_dict(),
             'optimizer_classifier_state_dict': optimizer_classifier.state_dict()}, 
            os.path.join(os.path.join(args[log_dir, 'checkpoints'),
                         'net_trained'))

        if epoch%30 == 0:
            net.eval()
            torch.save(
                {'model_state_dict': net.state_dict(), 
                  'optimizer_net_state_dict': optimizer_net.state_dict(), 
                  'optimizer_classifier_state_dict': optimizer_classifier.state_dict()}, 
                os.path.join(os.path.join(args[log_dir, 'checkpoints'),
                             'net_trained_%s'%str(epoch)))            
    
        # save learning rate in figure
        plt.clf()
        plt.plot(lrs_net)
        plt.savefig(os.path.join(args[log_dir,'lr_net.png'))
        plt.clf()
        plt.plot(lrs_classifier)
        plt.savefig(os.path.join(args[log_dir,'lr_class.png'))


In [None]:
net.eval()
torch.save(
    {'model_state_dict': net.state_dict(),
     'optimizer_net_state_dict': optimizer_net.state_dict(),
     'optimizer_classifier_state_dict': optimizer_classifier.state_dict()}, 
    os.path.join(os.path.join(args[log_dir, 'checkpoints'),
                 'net_trained_last'))

topks, img_prototype, proto_coord = visualize_topk(
    net, 
    projectloader, 
    args[num_classes, 
    device, 
    'visualised_prototypes_topk', 
    args,
    save=False)

# set weights of prototypes that are never really found in projection set to 0
set_to_zero = []

if topks:
    for prot in topks.keys():
        found = False
        for (i_id, score) in topks[prot]:
            if score > 0.1:
                found = True
        if not found:
            torch.nn.init.zeros_(net.module._classification.weight[:,prot])
            set_to_zero.append(prot)
    print(
        "Weights of prototypes",
        set_to_zero, 
        "are set to zero because it is never detected with similarity>0.1 \
            in the training set", 
        flush=True)
        
    eval_info = eval_pipnet(
        net, 
        testloader, 
        "notused" + str(args[epochs),
        device, log)
    
    log.log_values(
        'log_epoch_overview', 
        "notused"+str(args[epochs), 
        eval_info['top1_accuracy'], 
        eval_info['top5_accuracy'], 
        eval_info['almost_sim_nonzeros'], 
        eval_info['local_size_all_classes'], 
        eval_info['almost_nonzeros'], 
        eval_info['num non-zero prototypes'], 
        "n.a.", 
        "n.a.")

print("classifier weights: ", 
      net.module._classification.weight, 
      flush = True)

print("Classifier weights nonzero: ", 
      net.module._classification.weight[
          net.module._classification.weight.nonzero(as_tuple=True)], 
      (net.module._classification.weight[
          net.module._classification.weight.nonzero(as_tuple=True)]).shape, 
      flush=True)

print("Classifier bias: ", 
      net.module._classification.bias, 
      flush=True)

# Print weights and relevant prototypes per class
for c in range(net.module._classification.weight.shape[0]):
    relevant_ps = []
    proto_weights = net.module._classification.weight[c,:]
    
    for p in range(net.module._classification.weight.shape[1]):
        if proto_weights[p]> 1e-3:
            relevant_ps.append((p, proto_weights[p].item()))
    if args[val_split == 0.:
        print("Class", 
              c, 
              "(", 
              list(testloader.dataset.class_to_idx.keys())[
                  list(testloader.dataset.class_to_idx.values()).index(c)],
              "):",
              "has", 
              len(relevant_ps),
              "relevant prototypes: ", 
              relevant_ps, 
              flush=True)

In [12]:
testout=next(iter(trainloader)) ### 2min 6.1s for 25 batch using naive collate,

#TODO try optimizing load using collate fn

In [14]:
len(testout[0])

25

In [19]:
def get_network(num_classes: int, args):
    features = video_resnet18_features(pretrained=not args['disable_pretrained'])
    
    # Modify the first convolutional layer to accept 1 channel input
    features.conv1 = nn.Conv3d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
    
    first_add_on_layer_in_channels = 64  # Output channels of the modified first layer
    
    # Define the rest of your network architecture
    # Adjusting the number of prototypes and the final classification layer for binary classification
    if args['num_features'] == 0:
        num_prototypes = first_add_on_layer_in_channels
        add_on_layers = nn.Sequential(nn.Softmax(dim=1))
    else:
        num_prototypes = args['num_features']
        add_on_layers = nn.Sequential(
            nn.Conv3d(first_add_on_layer_in_channels, num_prototypes, kernel_size=1, stride=1, padding=0, bias=True),
            nn.Softmax(dim=1)
        )
    
    pool_layer = nn.Sequential(
        nn.AdaptiveMaxPool3d(output_size=(1, 1, 1)),
        nn.Flatten()
    )
    
    # Adjust the final classification layer for binary classification
    classification_layer = NonNegLinear(num_prototypes, num_classes, bias=True)
    
    return features, add_on_layers, pool_layer, classification_layer, num_prototypes

In [12]:
features = video_resnet18_features(pretrained=not args['disable_pretrained'])
features3Dconv=[i for i in features.modules() if isinstance(i, nn.Conv3d)]

In [13]:
testLayer=[nn.Conv3d(in_channels = 1, 
                out_channels = 3, 
                kernel_size =(3, 3, 3), 
                stride = (1, 1, 1), 
                padding = (1, 1, 1), 
                bias = True),]

In [14]:
newFeatures=nn.Sequential(testLayer[0],features)

In [29]:
features.layer1

Sequential(
  (0): BasicBlock(
    (conv1): Sequential(
      (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (conv2): Sequential(
      (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (relu): ReLU(inplace=True)
  )
  (1): BasicBlock(
    (conv1): Sequential(
      (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (conv2): Sequential(
      (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (1): BatchNorm3d(64, eps=1e-05, momentum=0

In [33]:
testLayer[0](testout[0][0])

RuntimeError: [enforce fail at alloc_cpu.cpp:80] data. DefaultCPUAllocator: not enough memory: you tried to allocate 9958733568 bytes.

In [12]:
testout=next(iter(trainloader))

In [7]:
testout[1].element_size()

2

In [13]:
testout[0][0].shape #80,115,97

torch.Size([3, 82, 167, 232])

In [20]:
network_layers = get_network(num_classes=1, args=args)
feature_net = network_layers[0]
add_on_layers = network_layers[1]
pool_layer = network_layers[2]
classification_layer = network_layers[3]
num_prototypes = network_layers[4]

netCPU = PIPNet(
    num_classes = args['out_shape'],
    num_prototypes = num_prototypes,
    feature_net = feature_net,
    args = args,
    add_on_layers = add_on_layers,
    pool_layer = pool_layer,
    classification_layer = classification_layer
    )



#xs1=testout.to(device)
testProto=netCPU(testout[0])

RuntimeError: Given groups=1, weight of size [64, 3, 3, 7, 7], expected input[25, 1, 171, 471, 483] to have 3 channels, but got 1 channels instead

In [48]:
netTest = testLayer[0].to(device=device)

with torch.no_grad():
    xs1,_= next(iter(testloader))
    xs1 = xs1[0].to(device)
    proto_features, _, _ = netTest(xs1)
    

OutOfMemoryError: CUDA out of memory. Tried to allocate 4.64 GiB. GPU 0 has a total capacty of 24.00 GiB of which 22.54 GiB is free. Of the allocated memory 222.61 MiB is allocated by PyTorch, and 7.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [10]:
#xs1,_= next(iter(trainloader))
testOut=testLayer[0](xs1)


RuntimeError: [enforce fail at alloc_cpu.cpp:80] data. DefaultCPUAllocator: not enough memory: you tried to allocate 99587335680 bytes.

In [14]:
xs1, xs2, ys= next(iter(trainloader))

In [15]:
xs1, xs2, ys = xs1.to(device), xs2.to(device), ys.to(device)

In [16]:
optimizer_classifier.zero_grad(set_to_none=True)
optimizer_net.zero_grad(set_to_none=True)

criterion = nn.NLLLoss(reduction='mean').to(device)

scheduler_net = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer_net, 
    T_max = len(trainloader_pretraining)*args['epochs_pretrain'], 
    eta_min = args['lr_block']/100., 
    last_epoch=-1)


In [14]:
proto_features, pooled, out = net(xs1) #we seem to be unable to pass a tensor through this NN with gradient for train?

OutOfMemoryError: CUDA out of memory. Tried to allocate 600.00 MiB. GPU 0 has a total capacty of 24.00 GiB of which 0 bytes is free. Of the allocated memory 26.48 GiB is allocated by PyTorch, and 1.04 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [17]:
proto_features, pooled, out = net(torch.cat([xs1, xs2]))