In [1]:
# Basic imports
import torch
import torchvision
import torchvision.transforms as transforms

In [2]:
import wandb

In [3]:
# Download data and create dataloaders
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# set up class labels
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [15]:
import torch.nn as nn
import torch.nn.functional as F

# Define Model
class Net(nn.Module):
    def __init__(self, channels_1, channels_2, hidden_1, hidden_2):
        super().__init__()
        self.conv1 = nn.Conv2d(3, channels_1, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(channels_1, channels_2, 5)
        self.fc1 = nn.Linear(channels_2 * 5 * 5, hidden_1)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        self.fc3 = nn.Linear(hidden_2, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Define train config
config = {
    "model_init_kwargs": {
        "channels_1": 6,
        "channels_2": 16,
        "hidden_1": 128,
        "hidden_2": 64
    },
    "optimizer_init_kwargs": {
        "lr": 1e-3,
        "momentum": 0.9
    },
    "criterion": nn.CrossEntropyLoss
}

wandb.init(project='sdsc-wandb-demo', config=config)
net = Net(**config["model_init_kwargs"])

VBox(children=(Label(value='0.084 MB of 0.084 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test/accuracy,▁█
test/bird_accuracy,█▁
test/car_accuracy,█▁
test/cat_accuracy,▁█
test/deer_accuracy,▁█
test/dog_accuracy,█▁
test/frog_accuracy,▁█
test/horse_accuracy,█▁
test/plane_accuracy,▁█
test/ship_accuracy,█▁

0,1
test/accuracy,46.0
test/bird_accuracy,19.4
test/car_accuracy,70.9
test/cat_accuracy,38.9
test/deer_accuracy,37.5
test/dog_accuracy,26.6
test/frog_accuracy,73.8
test/horse_accuracy,66.1
test/plane_accuracy,45.8
test/ship_accuracy,53.8


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016725346533348784, max=1.0…

In [16]:
import torch.optim as optim

# Define loss criterion
criterion = config["criterion"]()
# Define & configure optimizer
optimizer = optim.SGD(net.parameters(), **config["optimizer_init_kwargs"])

In [17]:
# Plot a few images from the dataset
import matplotlib.pyplot as plt
import numpy as np

# function to show an image
def imshow(img, labels, category):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    wimg = wandb.Image(np.transpose(npimg, (1, 2, 0)), caption=' '.join(f'{classes[lbl]:5s}' for lbl in labels))
    wandb.log({f"{category}/images": wimg})

# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

In [18]:
# Main training loop

for epoch in range(5):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        wandb.log({'train/batch_loss': loss.item()})
        if i % 2000 == 1999:    # print every 2000 mini-batches
            wandb.log({'train/running_loss': running_loss / 2000})
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0
    
    # Calculate accuracy of model on test set

    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}

    # again no gradients needed
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predictions = torch.max(outputs, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1


    correct = 0
    total = 0
    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        correct += correct_count
        total += total_pred[classname]
        accuracy = 100 * float(correct_count) / total_pred[classname]
        wandb.log({f'test/{classname}_accuracy': accuracy})
        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
    
    wandb.log({'test/accuracy': 100 * correct // total})
    print(f'Total accuracy of the network on the 10000 test images: {100 * correct // total} %')

    # Predictions on text set
    dataiter = iter(testloader)
    images, labels = next(dataiter)

    outputs = net(images)
    _, predicted = torch.max(outputs, 1)

    # print images
    imshow(torchvision.utils.make_grid(images), predicted, 'test')


print('Finished Training')

[1,  2000] loss: 2.212
[1,  4000] loss: 1.887
[1,  6000] loss: 1.677
[1,  8000] loss: 1.565
[1, 10000] loss: 1.518
[1, 12000] loss: 1.462
Accuracy for class: plane is 59.2 %
Accuracy for class: car   is 69.8 %
Accuracy for class: bird  is 18.1 %
Accuracy for class: cat   is 31.5 %
Accuracy for class: deer  is 27.7 %
Accuracy for class: dog   is 46.5 %
Accuracy for class: frog  is 63.6 %
Accuracy for class: horse is 67.3 %
Accuracy for class: ship  is 53.0 %
Accuracy for class: truck is 51.7 %
Total accuracy of the network on the 10000 test images: 48 %
[2,  2000] loss: 1.404
[2,  4000] loss: 1.347
[2,  6000] loss: 1.333
[2,  8000] loss: 1.315
[2, 10000] loss: 1.284
[2, 12000] loss: 1.287
Accuracy for class: plane is 58.2 %
Accuracy for class: car   is 75.4 %
Accuracy for class: bird  is 44.1 %
Accuracy for class: cat   is 47.3 %
Accuracy for class: deer  is 47.7 %
Accuracy for class: dog   is 25.9 %
Accuracy for class: frog  is 68.1 %
Accuracy for class: horse is 65.4 %
Accuracy for cl

In [19]:
# Save the trained model
path_to_model = 'models/cifar10_model.pt'
torch.save(net, path_to_model)

In [20]:
artifact = wandb.Artifact('net', type='model')
artifact.add_file(path_to_model)
wandb.log_artifact(artifact)

<wandb.sdk.wandb_artifacts.Artifact at 0x13f74ff70>

In [21]:
wandb.finish()

0,1
test/accuracy,▁▅▅█▇
test/bird_accuracy,▁▆█▆▅
test/car_accuracy,▁▄▄▆█
test/cat_accuracy,▂▇▁▂█
test/deer_accuracy,▂▆▁▆█
test/dog_accuracy,▅▁▆█▁
test/frog_accuracy,▁▃▇▅█
test/horse_accuracy,█▆▁▄▄
test/plane_accuracy,▃▂▂█▁
test/ship_accuracy,▁▇█▅█

0,1
test/accuracy,60.0
test/bird_accuracy,40.8
test/car_accuracy,82.2
test/cat_accuracy,52.3
test/deer_accuracy,59.3
test/dog_accuracy,24.0
test/frog_accuracy,78.5
test/horse_accuracy,61.9
test/plane_accuracy,55.9
test/ship_accuracy,84.0
