In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

import torch
device = torch.device("cuda")
import torch.nn.functional as F
import wandb
from torchvision.models import resnet34, resnet50
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

# Setup Weights and Biases and specify hyperparameters
wandb.init(project="Thomas-Masters-Project")

learning_rate = 0.001
epochs = 5
batch_size = 256
net_type = "pretrained_resnet50"

wandb.config = {
    "learning_rate": learning_rate,
    "epochs": epochs,
    "batch_size": batch_size,
    "network": net_type
}

def test(model, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad(): 
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output, *_ = model(data)
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            _, idx = output.max(dim=1)
            correct += (idx == target).sum().item()

    accuracy = 100. * correct / len(test_loader.dataset)
    print('Test set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset), accuracy))

    wandb.log({"accuracy": accuracy})

[34m[1mwandb[0m: Currently logged in as: [33mtnoel20[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
def _init_resnet_50(output_size, pretrained = False, features_hook = None):
    model = resnet50(pretrained=pretrained)
    model.fc = torch.nn.Linear(2048, output_size)
    if features_hook is not None:
        for name, module in model.named_modules():
            if name in ['layer1', 'layer2', 'layer3', 'layer4']:
                module.register_forward_hook(features_hook)

    return model

def create_pretrained_model(architecture, n_classes, features_hook = None):
    pretrained = True
    if 'resnet50' in architecture:
        net = _init_resnet_50(n_classes, pretrained, features_hook)
    else:
        raise NotImplementedError()

    return net
    
def create_model(architecture, n_classes, features_hook = None):
    pretrained = False
    if 'resnet50' in architecture:
        net = _init_resnet_50(n_classes, pretrained, features_hook)
    else:
        raise NotImplementedError()

    return net

class FeatureExtractor(torch.nn.Module):
    def __init__(self, architecture, n_classes = None):
        super().__init__()
        self._features = []
        if 'pretrained' in architecture:
            self.model = create_pretrained_model(
                architecture, 
                n_classes, 
                features_hook=self.feature_hook)
        else:
            self.model = create_model(
                architecture, 
                n_classes, 
                features_hook=self.feature_hook)

    def feature_hook(self, module, input, output):
        self._features.append(output)

    def forward(self, x):
        logits = self.model(x)
        return logits, self._features

    def clear_features(self):
        self._features = []

In [4]:
from torch.utils import data
from torchvision import datasets
from torchvision import transforms
import torch.nn as nn
from torch.optim import Adam, SGD, RMSprop

train_loader = data.DataLoader(
        datasets.CIFAR100('./data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229, 0.224, 0.225])
                       ])),
        batch_size=batch_size, shuffle=True, drop_last=True)

test_loader = data.DataLoader(
        datasets.CIFAR100('./data', train=False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229, 0.224, 0.225])
                       ])),
        batch_size=2048, shuffle=False, drop_last=False)

num_training_classes = 100

Files already downloaded and verified


In [5]:
import numpy as np
from large_margin import LargeMarginLoss


lm = LargeMarginLoss(
    gamma=10000,
    alpha_factor=4,
    top_k=num_training_classes,
    dist_norm=np.inf
)

net = FeatureExtractor(net_type, num_training_classes)
net.to(device)

def train_lm(model, train_loader, optimizer, epoch, lm):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device)
        one_hot = torch.zeros(len(target), 100).scatter_(1, target.unsqueeze(1), 1.).float()
        one_hot = one_hot.cuda()
        optimizer.zero_grad()
        output, features = model(data)
        model.clear_features()

        loss = lm(output, one_hot, features)
        
        wandb.log({"loss": loss})
        # optional
        wandb.watch(model)
        
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def convert_feature_map_to_list(feature_map):
    num_feature_layers = len(feature_map)
    feature_list = []
    for i in range(num_feature_layers):
        feature_list.append(feature_map['layer{}'.format(i+1)])
    
    return feature_list


import time

optim = Adam(net.parameters()) #SGD(net.parameters(), lr=learning_rate, momentum=0)
for i in range(0, epochs):
    start_time = time.time()
    train_lm(net, train_loader, optim, i, lm)
    end_time = time.time()

    print('Epoch {} took {} seconds to complete'.format(i+1, end_time-start_time))

    test(net, test_loader)



In [None]:
def train_ce(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output, _ = model(data)
        model.clear_features()

        loss = F.cross_entropy(output, target)
        
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

net = net_dict[net_type]().to(device)
# net = nn.DataParallel(net).to(device)
optim = Adam(net.parameters())
for i in range(0, epochs):    
    train_ce(net, train_loader, optim, i)
    test(net, test_loader)

In [2]:
from torchvision.models import resnet18, resnet34, resnet50, efficientnet_b1

model = efficientnet_b1()
for name, module in model.named_modules():
    print(name)


features
features.0
features.0.0
features.0.1
features.0.2
features.1
features.1.0
features.1.0.block
features.1.0.block.0
features.1.0.block.0.0
features.1.0.block.0.1
features.1.0.block.0.2
features.1.0.block.1
features.1.0.block.1.avgpool
features.1.0.block.1.fc1
features.1.0.block.1.fc2
features.1.0.block.1.activation
features.1.0.block.1.scale_activation
features.1.0.block.2
features.1.0.block.2.0
features.1.0.block.2.1
features.1.0.stochastic_depth
features.1.1
features.1.1.block
features.1.1.block.0
features.1.1.block.0.0
features.1.1.block.0.1
features.1.1.block.0.2
features.1.1.block.1
features.1.1.block.1.avgpool
features.1.1.block.1.fc1
features.1.1.block.1.fc2
features.1.1.block.1.activation
features.1.1.block.1.scale_activation
features.1.1.block.2
features.1.1.block.2.0
features.1.1.block.2.1
features.1.1.stochastic_depth
features.2
features.2.0
features.2.0.block
features.2.0.block.0
features.2.0.block.0.0
features.2.0.block.0.1
features.2.0.block.0.2
features.2.0.block