In [1]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from time import time
import wandb

In [2]:
class BasicBlock(nn.Module):

    def __init__(self, in_planes, planes, stride=1, kernel_size=3, dropout=0.2):
        super(BasicBlock, self).__init__()
        self.dropout = dropout
        
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=kernel_size, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=kernel_size, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.dropout(out, self.dropout)
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [3]:
class ResNet3(nn.Module):
    def __init__(self, block, num_blocks, in_planes, k=2, num_classes=10, kernel_size=3, dropout=0.2):
        super(ResNet3, self).__init__()
        self.in_planes = in_planes
        self.avg_pool_kernal_size = 4
        self.kernel_size = kernel_size
        self.dropout = dropout
        
        self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=kernel_size, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_planes)
        self.layer1 = self._make_layer(block, k*self.in_planes, num_blocks[0], stride=1) # 32
        self.layer2 = self._make_layer(block, k*self.in_planes, num_blocks[1], stride=2) # 64
        self.layer3 = self._make_layer(block, k*self.in_planes, num_blocks[2], stride=2) #128
        #self.layer4 = self._make_layer(block, k*self.in_planes, num_blocks[3], stride=2) #256
        self.linear = nn.Linear(4*self.in_planes, num_classes)  #512 dense layers

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, self.kernel_size, self.dropout))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        #out = self.layer4(out)
        out = F.avg_pool2d(out, self.avg_pool_kernal_size)
        out = out.view(out.size(0), -1)
        out = F.dropout(out,self.dropout)
        out = self.linear(out)
        return out

In [4]:
batch_size = 128
n_epochs = 300
num_workers = 16

In [5]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdeeplearning_project[0m (use `wandb login --relogin` to force relogin)


True

In [6]:
wandb.init(
        project="teammatecode Resnet3 inplane22 numblock(7,7,7)",
        config={
            "epochs": 300,           # Trainig epochs
            "optimizer":"sgd",
            "scheduler":'CosineAnnealingLR',
            "t_max":100,
            "batch_size": 128,       # batch size
            "lr": 1e-2,              # Learning rate
            "in_planes":22,          # no of channels in first conv layer
            "num_blocks":(7,7,7), # num of ResNet block in each Residual layer
            "k":2,                   # widening factor
            "classes":10,
            "dataset":"CIFAR10",
            "architecture":"Deep ResNet3"
            })

# in_planes, num_blocks, k=2,

config = wandb.config

In [7]:
##### define transform
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomRotation(5),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [8]:

# get training and test sets
train_data = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)

test_data = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)


Files already downloaded and verified
Files already downloaded and verified


In [9]:
# define data loaders
def loader(batch_size):
    trainloader = torch.utils.data.DataLoader(
        train_data,
        batch_size = batch_size,
        num_workers = num_workers
    )
    testloader = torch.utils.data.DataLoader(
        test_data,
        num_workers = num_workers
    )
    return trainloader, testloader

In [10]:
def train_model(train_loader, epoch, loss_fn, optimizer, model):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    print('\nEpoch: %d' % epoch)
    
    model.train()
    model.to(device)
    train_loss_current = 0
    train_current_corrects = 0
    train_current_total = 0


    for batch, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()

        train_loss_current += loss.item()
        _, predicted_class = y_pred.max(1)
        train_current_total += y.size(0)
        train_current_corrects += (predicted_class == y).sum().item()
    
    # Save Checkpoint
    train_loss = train_loss_current/len(train_loader)
    train_accuracy = 100*float(train_current_corrects) / train_current_total
    
    return train_loss, train_accuracy 

def test_model(test_loader, epoch, loss_fn, model):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    global best_acc

    model.eval()
    model.to(device)

    test_loss_current = 0
    test_current_corrects = 0
    test_current_total = 0

    with torch.no_grad():
        for batch, (X, y) in enumerate(test_loader):
            X = X.to(device)
            y = y.to(device)
    
            y_pred = model(X)
            loss = loss_fn(y_pred, y)

            test_loss_current += loss.item()

            _, predicted_class = y_pred.max(1)
            test_current_total += y.size(0)
            test_current_corrects += (predicted_class == y).sum().item()
    
    # Save Checkpoint
    test_loss = test_loss_current/len(test_loader)
    test_accuracy = 100*float(test_current_corrects) / test_current_total
    
    if test_accuracy >= best_acc:
            print("Accuracy increased {} --> {}. Saving model...".format(best_acc, test_accuracy))
            torch.save(model.state_dict(),'model.pt')
            best_acc = test_accuracy
            torch.onnx.export(model, X, "model.onnx")
            wandb.save("model.onnx")
            
    return test_loss, test_accuracy

In [11]:
def train(model, epochs, optimizer, scheduler, loss_fn, train_loader, test_loader):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    
    wandb.watch(model, loss_fn, log='all', log_freq=5000, log_graph=True)
    print('train() called: model=%s, opt=%s(lr=%f), epochs=%d, device=%s\n' % \
              (type(model).__name__, type(optimizer).__name__,
               optimizer.param_groups[0]['lr'], epochs, device))
    
    history             = {}
    history['loss']     = []
    history['val_loss'] = []
    history['acc']      = []
    history['val_acc']  = []


    for epoch in range(epochs):
        train_loss, train_accuracy = train_model(train_loader, epoch, criterion, optimizer, model)
        test_loss, test_accuracy = test_model(test_loader, epoch, criterion, model)
        scheduler.step()
        
        history['loss'].append(train_loss)
        history['val_loss'].append(test_loss)
        history['acc'].append(train_accuracy)
        history['val_acc'].append(test_accuracy)
        
        wandb.log({'epoch':epoch, 'train_loss':train_loss, 'test_loss':test_loss, 'train_acc':train_accuracy,'test_accuracy':test_accuracy})  
    
    wandb.finish()
    return history

In [12]:
# instantiate model
model = ResNet3(BasicBlock, (7,7,7),22, k = 2)
total_params = sum(p.numel() for p in model.parameters())
print(total_params)

4976960


In [13]:
train_dl, test_dl = loader(128)
# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters())
optimizer = optim.SGD(model.parameters(), lr = 0.008, momentum = 0.9, weight_decay = 5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 100)



In [None]:
best_acc = 0
train(model, config.epochs,optimizer,scheduler,criterion,train_dl, test_dl)

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


train() called: model=ResNet3, opt=SGD(lr=0.008000), epochs=300, device=cuda


Epoch: 0
Accuracy increased 0 --> 42.28. Saving model...





Epoch: 1
Accuracy increased 42.28 --> 62.7. Saving model...

Epoch: 2
Accuracy increased 62.7 --> 66.44. Saving model...

Epoch: 3
Accuracy increased 66.44 --> 70.14. Saving model...

Epoch: 4
Accuracy increased 70.14 --> 73.5. Saving model...

Epoch: 5
Accuracy increased 73.5 --> 77.07. Saving model...

Epoch: 6
Accuracy increased 77.07 --> 79.05. Saving model...

Epoch: 7
Accuracy increased 79.05 --> 79.25. Saving model...

Epoch: 8
Accuracy increased 79.25 --> 80.57. Saving model...

Epoch: 9
Accuracy increased 80.57 --> 82.02. Saving model...

Epoch: 10

Epoch: 11
Accuracy increased 82.02 --> 82.52. Saving model...

Epoch: 12
Accuracy increased 82.52 --> 83.81. Saving model...

Epoch: 13
Accuracy increased 83.81 --> 85.5. Saving model...

Epoch: 14

Epoch: 15
Accuracy increased 85.5 --> 86.13. Saving model...

Epoch: 16

Epoch: 17

Epoch: 18

Epoch: 19
Accuracy increased 86.13 --> 86.94. Saving model...

Epoch: 20
Accuracy increased 86.94 --> 87.35. Saving model...

Epoch: 21
Accu