In [1]:
from PIL import Image
import requests
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

import torch
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
torch.set_grad_enabled(False);

from torchsummary import summary

In [2]:
from torchvision import datasets, transforms, models
import torch.optim as optim
import time
import os
import copy
\
import numpy as np
import torch.nn.functional as F
from torch.optim import lr_scheduler

import skimage
import skimage.transform

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.empty_cache()
print("Allocated Memory:",torch.cuda.memory_allocated())

cuda:0
Allocated Memory: 0


In [4]:
# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.Resize((180,180)),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [5]:
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
val_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
tr_split_len = 5000
vl_split_len = 1000
training_dataset = torch.utils.data.random_split(train_dataset, [tr_split_len, len(train_dataset)-tr_split_len])[0]
validation_dataset = torch.utils.data.random_split(val_dataset, [vl_split_len, len(val_dataset)-vl_split_len])[0]

In [7]:
training_loader = torch.utils.data.DataLoader(training_dataset, batch_size=10, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size = 10, shuffle=False)

In [8]:
class DETRdemo(nn.Module):
    """
    Demo DETR implementation.

    Demo implementation of DETR in minimal number of lines, with the
    following differences wrt DETR in the paper:
    * learned positional encoding (instead of sine)
    * positional encoding is passed at input (instead of attention)
    * fc bbox predictor (instead of MLP)
    The model achieves ~40 AP on COCO val5k and runs at ~28 FPS on Tesla V100.
    Only batch size 1 supported.
    """
    def __init__(self, num_classes, hidden_dim=256, nheads=8,
                 num_encoder_layers=6, num_decoder_layers=6):
        super().__init__()

        # create ResNet-50 backbone
        self.backbone = resnet50()
        del self.backbone.fc

        # create conversion layer
        self.conv = nn.Conv2d(2048, hidden_dim, 1)

        # create a default PyTorch transformer
        self.encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=8), num_layers=6)

        # prediction heads, one extra class for predicting non-empty slots
        # note that in baseline DETR linear_bbox layer is 3-layer MLP
        self.linear_class = nn.Linear(hidden_dim*6*6, num_classes)
        #self.linear_bbox = nn.Linear(hidden_dim, 4)

        # output positional encodings (object queries)
        #self.query_pos = nn.Parameter(torch.rand(1, hidden_dim))

        # spatial positional encodings
        # note that in baseline DETR we use sine positional encodings
        self.row_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))
        self.col_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))

    def forward(self, inputs):
        # propagate inputs through ResNet-50 up to avg-pool layer
        x = self.backbone.conv1(inputs)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        # convert from 2048 to 256 feature planes for the transformer
        h = self.conv(x)

        # construct positional encodings
        H, W = h.shape[-2:]
        pos = torch.cat([
            self.col_embed[:W].unsqueeze(0).repeat(H, 1, 1),
            self.row_embed[:H].unsqueeze(1).repeat(1, W, 1),
        ], dim=-1).flatten(0, 1).unsqueeze(1)
        
        #print(self.query_pos.unsqueeze(1).shape)
        
        # propagate through the transformer
        h = self.encoder(pos + 0.1 * h.flatten(2).permute(2, 0, 1))#,
        #                     self.query_pos.unsqueeze(1)).transpose(0, 1)
        #print(h.shape)
        h = h.transpose(0, 1).flatten(1)
        #print(h.shape)
        # finally project transformer outputs to class labels and bounding boxes
        return self.linear_class(h)

In [9]:
detr = DETRdemo(num_classes=10)

In [10]:
model = detr.to(device)
model = model.train()

In [11]:
def add_noise(inputs):
    noise = torch.randn_like(inputs)*0.2
    return inputs + noise

In [12]:
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
img= Image.open(requests.get(url, stream=True).raw)
im = transform(img).unsqueeze(0).to(device)

scores = model(im)

In [13]:
scores.shape

torch.Size([1, 10])

In [14]:
#for param in model.encoder.parameters():
#    print(param.grad)

In [15]:
model_criterion = nn.CrossEntropyLoss()
model_optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)
model_lr_scheduler = lr_scheduler.StepLR(model_optimizer, step_size=5, gamma=0.1)

In [19]:
def train_model_rn(model, inputs, labels, opt_d, model_criterion):
    
    
    # Clear discriminator gradients
    opt_d.zero_grad()
    #opt_back.zero_grad()
    
    real_inputs = inputs.to(device) # allow gpu use
    labels = labels.to(device) # allow gpu use
    # Pass real images through model
    real_preds = model(real_inputs)
    real_loss = model_criterion(real_preds, labels)
    real_score = torch.mean(real_preds).item()
    
    #_, preds = torch.max(real_preds, 1) # return the index of the maximum value predicted for that image (used to generate the accuracy)
     # the sum of the loss of all itens
    #running_corrects = torch.sum(preds == labels.data) # the sum of correct prediction on an epochs
    #print("TRAIN MODEL FUNCTION - RUNNING CORRECTS",running_corrects)
    
    # Generate fake images
    noised_inputs = add_noise(real_inputs)

    # Pass fake images through discriminator
    noised_preds = model(noised_inputs)
    noised_loss = model_criterion(noised_preds, labels)
    noised_score = torch.mean(noised_preds).item()
    
    #_, preds_noised = torch.max(noised_preds, 1)
    #running_noised_corrects = torch.sum(preds_noised == labels.data)
    #print("TRAIN MODEL FUNCTION - RUNNING NOISED CORRECTS",running_noised_corrects)
    
    Typenone = 0
    zeros = 0
    normal = 0
    for name, param in model.encoder.named_parameters():
        if param.grad == None:
            Typenone +=1
        elif torch.sum(param.grad) == 0:
            zeros += 1
        else:
            normal += 1

    print("None parameters:",Typenone)
    print("Zero Grad Parameters:", zeros)
    print("Normally computed Parameters:",normal)

    # Update discriminator weights
    loss = real_loss + noised_loss
    #print(loss.requires_grad)
    loss.requires_grad = True
    
    loss.backward()
    opt_d.step()
    #opt_trans.step()
    #opt_back.step()
    return loss, real_score, noised_score#, running_corrects, running_noised_corrects

In [20]:
def fit_random_noise(training_loader, validation_loader, model, model_criterion, model_optimizer, model_lr_scheduler, len_train, len_val, 
    path='test/TOP_PERFORMANCE_epoch_', epochs=25, start_idx=1):
    
    torch.cuda.empty_cache()
    
    # Losses & scores
    losses_d = []
    real_scores = []
    noise_scores = []
    real_corrects = []
    noised_corrects = []
    val_real_corrects = []
    val_noised_corrects = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(epochs):
        start_time = time.time()
        print('Epoch {}/{}'.format(epoch+1, epochs))
        print('-' * 10)
        
        running_loss = 0.0
        running_real_corrects = 0.0
        running_noised_corrects = 0.0
        val_running_loss = 0.0
        val_running_real_corrects = 0.0
        val_running_noised_corrects = 0.0

        
        for inputs, labels in training_loader:
            # Train discriminator
            loss_d, real_score, noise_score= train_model_rn(model, inputs, labels, model_optimizer, model_criterion)
            #running_real_corrects +=real_pred
            #running_noised_corrects +=noised_pred
            #print("FIT FUNCTION - RUNNING REAL CORRECTS",running_real_corrects.item())
            #print("FIT FUNCTION - RUNNING NOISED CORRECTS",running_noised_corrects.item())
            

        else:
        #VALIDATION
            with torch.no_grad(): # to save memory (temporalely set all the requires grad to be false)
                for val_inputs, val_labels in validation_loader:
                    val_inputs = val_inputs.to(device) # allow gpu use
                    val_labels = val_labels.to(device) # allow gpu use
                    val_noised_input = add_noise(val_inputs) #passes the image through the network and get the output
                    val_preds_real = model(val_inputs)
                    val_preds_noised = model(val_noised_input)
                    val_real_loss = model_criterion(val_preds_real, val_labels) #compare output and labels to get the loss 
                    val_noised_loss = model_criterion(val_preds_noised, val_labels)
                    
                    _, val_real_preds = torch.max(val_preds_real, 1) #same as for training
                    _, val_noised_preds = torch.max(val_preds_noised, 1) #same as for training
                    
                    val_loss = val_real_loss + val_noised_loss
                    val_running_real_corrects += torch.sum(val_real_preds == val_labels.data) #same as for training
                    val_running_noised_corrects += torch.sum(val_noised_preds == val_labels.data) #same as for training
        
        model_lr_scheduler.step()
        #transformer_lr_scheduler.step()
        #backbone_lr_scheduler.step()

        # Record losses & scores
        losses_d.append(loss_d)
        real_scores.append(real_score)
        noise_scores.append(noise_score)
        real_corrects.append(running_real_corrects)
        noised_corrects.append(running_noised_corrects)
        val_real_corrects.append(val_running_real_corrects)
        val_noised_corrects.append(val_running_noised_corrects)
        
        # Log losses & scores (last batch)
        print("Epoch [{}/{}], loss_d: {:.4f}, real_score: {:.4f}, noised_score: {:.4f}".format(
            epoch+1, epochs, loss_d, real_score, noise_score))
    
    
        # Model accuracy       
        print("EPOCH RUNNING REAL CORRECT PREDICTIONS",running_real_corrects.item())
        print("EPOCH RUNNING NOISED CORRECTS PREDICTIONS",running_noised_corrects.item())
        epoch_real_acc = running_real_corrects.item()/ len_train
        epoch_noised_acc = running_noised_corrects.item()/ len_train
        print("Epoch [{}/{}], Training - real acc: {:.4f}, noised acc: {:.4f}".format(
            epoch+1, epochs, epoch_real_acc, epoch_noised_acc))
        
        val_epoch_real_acc = val_running_real_corrects.float()/ len_val
        val_epoch_noised_acc = val_running_noised_corrects.float()/ len_val
        print("Epoch [{}/{}], Validation - real acc: {:.4f}, noised acc: {:.4f}".format(
            epoch+1, epochs, val_epoch_real_acc, val_epoch_noised_acc))
              
        # Save generated images
        #save_samples(epoch+start_idx, fixed_latent, show=False)
        
        
        epoch_time_elapsed = time.time() - start_time
        print('Epoch training complete in {:.0f}m {:.0f}s'.format(
            epoch_time_elapsed // 60, epoch_time_elapsed % 60))

        if val_epoch_real_acc > best_acc:
                best_acc = val_epoch_real_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                print("TOP PERFORMANCE UPDATED")
        
        #out, satt, conv_features = model(img.to(device))
        #f_map = conv_features
        #shape = f_map.shape[-2:]
        #sattn = satt.reshape(shape + shape).cpu().detach().numpy()
        #plot_MHAM(sattn)
                            
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
    #torch.save(model.state_dict(),path + str(epoch) + '.pth')
    
    return losses_d, real_corrects, noised_corrects, val_real_corrects, val_noised_corrects

In [21]:
random_noise_training = fit_random_noise(training_loader, validation_loader, model, model_criterion, model_optimizer,
    model_lr_scheduler, len_train=len(training_dataset), len_val=len(validation_dataset), 
    path='RAMDOM_NOISE_NO_BYPASS_TOP_PERFORMANCE_epoch_', epochs=4, start_idx=1)

Epoch 1/4
----------
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Param

None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0
None parameters: 72
Zero Grad Parameters: 0
Normally computed Parameters: 0


KeyboardInterrupt: 