In [1]:
import torch
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import scipy.io as sio
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from torchvision import datasets, transforms
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import wandb

In [3]:
# download the cifar10 dataset
trainset = datasets.CIFAR10(root='/scratch/cifar10', train=True, download=True, transform=ToTensor())

Files already downloaded and verified


In [4]:
testset = datasets.CIFAR10(root='/scratch/cifar10', train=False, download=False, transform=ToTensor())

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [6]:
# print len of train
print(len(trainset))

50000


In [6]:
loaders = {
    'train': DataLoader(trainset, batch_size=256, shuffle=True, num_workers=1),
    'test': DataLoader(testset, batch_size=512, shuffle=False, num_workers=1)
}

In [7]:
class AddNoiseLayer(torch.nn.Module):
    # mu, sigma are mean and std of noise
    def __init__(self, n_mu, n_sigma, r_mu, r_sigma):
        super(AddNoiseLayer, self).__init__()
        self.n_mu = n_mu
        self.n_sigma = n_sigma
        self.r_mu = r_mu
        self.r_sigma = r_sigma

    def forward(self, x):
        mu_t = torch.rand(x.size(1), device=x.device) * 2 * self.n_mu - self.n_mu
        sigma_t = torch.rand(x.size(1), device=x.device) * self.n_sigma + 1
        noise = torch.randn_like(x) * sigma_t.view(1, -1, 1, 1) + mu_t.view(1, -1, 1, 1)
        return x + self.r_sigma * noise

class VGG16(nn.Module):
    def __init__(self, num_classes, batchnorm, noise):
        self.noise = noise
        super(VGG16, self).__init__()
        self.layer1 = self._make_layer(3, 64, batchnorm)
        self.layer2 = self._make_layer(64, 64, batchnorm, maxpool=True)
        self.layer3 = self._make_layer(64, 128, batchnorm)
        self.layer4 = self._make_layer(128, 128, batchnorm, maxpool=True)
        self.layer5 = self._make_layer(128, 256, batchnorm)
        self.layer6 = self._make_layer(256, 256, batchnorm)
        self.layer7 = self._make_layer(256, 256, batchnorm, maxpool=True)
        self.layer8 = self._make_layer(256, 512, batchnorm)
        self.layer9 = self._make_layer(512, 512, batchnorm)
        self.layer10 = self._make_layer(512, 512, batchnorm, maxpool=True)
        self.layer11 = self._make_layer(512, 512, batchnorm)
        self.layer12 = self._make_layer(512, 512, batchnorm)
        self.layer13 = self._make_layer(512, 512, batchnorm, maxpool=True)
        self.layer14 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512, 4096),
            nn.ReLU(),
        )
        self.layer15 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
        )
        self.layer16 = nn.Sequential(
            nn.Linear(4096, num_classes),
        )

    def _make_layer(self, in_channels, out_channels, batchnorm, maxpool=False):
        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=1),
        ]
        
        if batchnorm:
            layers.append(nn.BatchNorm2d(out_channels))
            
            if self.noise:
                # used the same noise parameters as in the paper
                layers.append(AddNoiseLayer(0.5, 1.25, 0.1, 0.1))
            
        layers.append(nn.ReLU())
        
        if maxpool:
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            
        return nn.Sequential(*layers)

    def cosine_similarity(self, x, y):
        cos = nn.CosineSimilarity(dim=-1, eps=1e-6)
        return cos(x.view(-1), y.view(-1))
    
    def compute_gradient_difference(initial_grad, final_grad):
        return torch.norm(final_grad - initial_grad, p=2)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.view(out.size(0), -1)
        out = self.layer14(out)
        out = self.layer15(out)
        out = self.layer16(out)
        return out
   
    def fit(self, loaders, epochs, device, learning_rate):
        self.to(device)
        criterion = nn.CrossEntropyLoss().to(device)
        # adamoptimizer
        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
        for epoch in tqdm(range(epochs)):
            self.train().to(device)

            total_loss = 0
            correct = 0
            total = 0
            initial_weights5 = self.layer5[0].weight.data.clone().detach()
            initial_weights10 = self.layer10[0].weight.data.clone().detach()

            for i, (images, labels) in enumerate(loaders['train']):
                self.train().to(device)
                optimizer.zero_grad()

                images = images.to(device)
                labels = labels.to(device)
                
                outputs = self(images)
                loss = criterion(outputs, labels)
                
                loss.backward()
                optimizer.step()

                total_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                if(i % 30 == 0):
                    wandb.log({"Epoch": epoch + 1, "Loss": total_loss / (i + 1), "Train_Accuracy": 100 *correct / total})
            final_weights5 = self.layer5[0].weight.data.clone().detach()
            final_weights10 = self.layer10[0].weight.data.clone().detach()
            cos5 = nn.functional.cosine_similarity(initial_weights5.view(-1), final_weights5.view(-1), dim=0)
            cos10 = nn.functional.cosine_similarity(initial_weights10.view(-1), final_weights10.view(-1), dim=0)
            wandb.log({"Epoch": epoch + 1, "Cosine Similarity Layer 5": cos5})
            wandb.log({"Epoch": epoch + 1, "Cosine Similarity Layer 10": cos10})
            l25 = nn.functional.pairwise_distance(initial_weights5.view(-1), final_weights5.view(-1), p=2)
            l210 = nn.functional.pairwise_distance(initial_weights10.view(-1), final_weights10.view(-1), p=2)
            wandb.log({"Epoch": epoch + 1, "L2 Distance Layer 5": l25})
            wandb.log({"Epoch": epoch + 1, "L2 Distance Layer 10": l210})

In [48]:
sweep_config = {
    'method': 'grid',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'noise': {
            'values': [True, False]
        },
        'batchnorm': {
            'values': [True, False]
        },
    }
}

In [49]:
sweep_id = wandb.sweep(sweep_config, project='adding_noise')

Create sweep with ID: huszyrii
Sweep URL: https://wandb.ai/project-ai-scream/adding_noise/sweeps/huszyrii


In [50]:
def wandb_logging():
    with wandb.init(config=sweep_config):
        config = wandb.config
        if config.noise and config.batchnorm:
            name = 'batchnorm, noise'
        elif config.noise:
            name = 'no batchnorm, noise'
        elif config.batchnorm:
            name = 'batchnorm, no noise'
        else:
            name = 'no batchnorm, no noise'
        wandb.run.name = name
        model = VGG16(10, config.batchnorm, config.noise)
        model.fit(loaders, 50, device, 0.0005)

In [51]:
wandb.agent(sweep_id, wandb_logging)

[34m[1mwandb[0m: Agent Starting Run: hkzf58e3 with config:
[34m[1mwandb[0m: 	batchnorm: True
[34m[1mwandb[0m: 	noise: True


100%|██████████| 50/50 [10:39<00:00, 12.78s/it]


0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Loss,█▆▅▄▃▃▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train_Accuracy,▁▃▄▅▆▇▇▇▇▇██████████████████████████████

0,1
Epoch,50.0
Loss,0.03767
Train_Accuracy,99.01804


[34m[1mwandb[0m: Agent Starting Run: jctekcfo with config:
[34m[1mwandb[0m: 	batchnorm: True
[34m[1mwandb[0m: 	noise: False


100%|██████████| 50/50 [09:45<00:00, 11.70s/it]


0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Loss,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train_Accuracy,▁▃▄▅▆▇▇▇▇▇██████████████████████████████

0,1
Epoch,50.0
Loss,0.02352
Train_Accuracy,99.30508


[34m[1mwandb[0m: Agent Starting Run: 733mg74h with config:
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	noise: True


100%|██████████| 50/50 [08:39<00:00, 10.39s/it]


0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Loss,█▄▄▃▅▃▃▄▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▃▃▃▃▃▃▃▃▃▃▃▃▃
Train_Accuracy,▄▄▄▄▁▄▄▃▄▄█▄▄▄▄▃▃▃▄▄▄▄▄▅▃▄█▄▄▄▄▃▄▄▄▄▄▄▃▄

0,1
Epoch,50.0
Loss,2.30262
Train_Accuracy,9.75052


[34m[1mwandb[0m: Agent Starting Run: ne19zf4b with config:
[34m[1mwandb[0m: 	batchnorm: False
[34m[1mwandb[0m: 	noise: False


100%|██████████| 50/50 [08:49<00:00, 10.58s/it]


0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Loss,█▇▆▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train_Accuracy,▁▂▃▄▄▅▆▆▆▇▇▇▇▇▇█████████████████████████

0,1
Epoch,50.0
Loss,0.06392
Train_Accuracy,98.15694


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [8]:
sweep_config = {
    'method': 'grid',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'batchnorm': {
            'values': [False, True]
        },
    }
}

In [12]:
sweep_id = wandb.sweep(sweep_config, project='cos-angle')

Create sweep with ID: tgwfx72h
Sweep URL: https://wandb.ai/project-ai-scream/cos-angle/sweeps/tgwfx72h


In [13]:
def wandb_logging():
    with wandb.init(config=sweep_config):
        config = wandb.config
        if config.batchnorm:
            name = 'standard+batchnorm'
        else:
            name = 'standard'
        wandb.run.name = name
        model = VGG16(10, config.batchnorm, False)
        model.fit(loaders, 100, device, 0.0001)

In [14]:
wandb.agent(sweep_id, wandb_logging)

[34m[1mwandb[0m: Agent Starting Run: 81hn7bee with config:
[34m[1mwandb[0m: 	batchnorm: False
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 100/100 [17:45<00:00, 10.66s/it]


0,1
Cosine Similarity Layer 10,█▆▁▅▆▇▇▇▇███████████████████████████████
Cosine Similarity Layer 5,█▁▂▄▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▆▇▇
Epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
L2 Distance Layer 10,▁▃█▅▄▃▃▃▃▃▃▂▃▃▂▂▃▂▂▂▂▃▃▂▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃
L2 Distance Layer 5,▁██▆▅▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▄▄▅▄▅▅▄▅▄▄▄▅▅▅▅
Loss,█▆▅▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train_Accuracy,▁▃▄▅▅▆▆▆▇▇▇▇▇███████████████████████████

0,1
Cosine Similarity Layer 10,0.99868
Cosine Similarity Layer 5,0.99911
Epoch,100.0
L2 Distance Layer 10,1.16744
L2 Distance Layer 5,0.53084
Loss,0.02201
Train_Accuracy,99.32018


[34m[1mwandb[0m: Agent Starting Run: g26ev25b with config:
[34m[1mwandb[0m: 	batchnorm: True
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 100/100 [19:37<00:00, 11.77s/it]


0,1
Cosine Similarity Layer 10,▁▄▄▅▆▆▆▆▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█▇▇▇▇
Cosine Similarity Layer 5,▁▅▅▅▆▆▇▆▇▇▆▇▆▇▇▇▆▇▆▇▇▇▆▇▇▇▇▆▆▇▇▇▆▇▇█▇▇▇▆
Epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
L2 Distance Layer 10,█▄▄▃▂▂▁▁▂▂▂▂▃▃▃▃▅▃▄▄▄▃▅▅▄▅▅▆▆▄▅▅▆▄▆▂▆▆▇▇
L2 Distance Layer 5,█▄▄▄▃▂▂▂▂▂▂▂▃▂▂▂▄▂▃▃▂▃▄▃▃▃▃▄▄▃▃▃▄▂▄▁▄▃▄▅
Loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁
Train_Accuracy,▁▅▆▇▇███████████████████████████████████

0,1
Cosine Similarity Layer 10,0.9969
Cosine Similarity Layer 5,0.99824
Epoch,100.0
L2 Distance Layer 10,1.8377
L2 Distance Layer 5,0.69702
Loss,0.01816
Train_Accuracy,99.46262


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
