In [1]:
%%capture
!pip install -q clearml

In [2]:
# Log in to your clearml account
import clearml
clearml.browser_login()

<IPython.core.display.Javascript object>


🤖 ClearML connected successfully - let's build something! 🚀


In [3]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [4]:
def make_loader(batch_size, train=True):
    full_dataset = datasets.MNIST(root='./data/MNIST', train=train, 
                                    download=True,  transform=transforms.ToTensor())
    
    loader = DataLoader(dataset=full_dataset,
                        batch_size=batch_size, 
                        shuffle=True,
                        pin_memory=True, num_workers=2)

    return loader

In [5]:
class ConvNet(nn.Module):
    def __init__(self, kernels, classes=10):
        super(ConvNet, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, kernels[0], kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, kernels[1], kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.fc = nn.Linear(7 * 7 * kernels[-1], classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        
        return out

In [6]:
def train(model, loader, criterion, optimizer, device, config):
    cumu_loss = 0
    tqdm_loader = tqdm(loader)

    for images, labels in tqdm_loader:
        images, labels = images.to(device), labels.to(device)
        logit = model(images)
        loss = criterion(logit, labels)
        cumu_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    return loss

In [7]:
def valid(model,test_loader):
    model.eval()
    acc_list = []
    
    with torch.no_grad():
        tqdm_loader = tqdm(test_loader)
        for images, labels in tqdm_loader:
            logit = model(images)
            pred = torch.argmax(logit, dim=1)
            batch_acc = accuracy_score(labels, pred)
            acc_list.append(batch_acc)

    return sum(acc_list) / len(acc_list) 

In [8]:
config = {
    'epochs': 5,
    'classes': 10,
    'batch_size': 128,
    'kernels': [16, 32],
    'weight_decay': 0.0005,
    'learning_rate': 1e-3,
    'dataset': 'MNIST',
    'architecture': 'CNN',
    'val_evrey' : 5,
    'seed': 42
    }

from clearml import Task
import torch.optim as optim

def run(config=None):
    task = Task.init(project_name='MNIST-project', task_name='experiment')
    task.connect(config, 'config')

    device = "cuda" if torch.cuda.is_available() else "cpu"

    train_loader = make_loader(batch_size=config['batch_size'], train=True)
    test_loader = make_loader(batch_size=config['batch_size'], train=False)

    model = ConvNet(config['kernels'], config['classes']).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])

    for epoch in range(1, config['epochs']):
        loss = train(model, train_loader, criterion, optimizer, device, config)
        acc = valid(model, test_loader)

        print(f"[Train] Epoch {epoch:02} | Loss: {loss:.2f}")
        print(f"[Vaild] Epoch {epoch:02} | Acc: {acc:.3f}")

        task.get_logger().report_scalar(title="Loss", series="train", value=loss, iteration=epoch)
        task.get_logger().report_scalar(title="Accuracy", series="val", value=acc, iteration=epoch)        
    
        # if epoch % 5 == 0:
        #     acc = valid(model, test_loader)
        #     print(f"[Valid] Epoch {epoch:02} | Acc: {acc:.3f}")
        #     task.get_logger().report_scalar(title="Accuracy", series="val", value=acc, iteration=epoch)
            
    return model

In [9]:
model = run(config)

ClearML Task: created new task id=6d81ec6b380345c6b2a8188de84b1046
ClearML results page: https://app.clear.ml/projects/9c24191d9cbc4f0aa9c6fbede2cbd0e2/experiments/6d81ec6b380345c6b2a8188de84b1046/output/log
2023-03-21 14:56:18,791 - clearml.Task - INFO - Storing jupyter notebook directly as code
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw
ClearML Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw



100%|██████████| 469/469 [00:55<00:00,  8.46it/s]
100%|██████████| 79/79 [00:06<00:00, 12.37it/s]


[Train] Epoch 01 | Loss: 0.20
[Vaild] Epoch 01 | Acc: 0.971


100%|██████████| 469/469 [00:46<00:00, 10.05it/s]
100%|██████████| 79/79 [00:05<00:00, 14.08it/s]


[Train] Epoch 02 | Loss: 0.02
[Vaild] Epoch 02 | Acc: 0.982


100%|██████████| 469/469 [00:47<00:00,  9.94it/s]
100%|██████████| 79/79 [00:05<00:00, 14.83it/s]


[Train] Epoch 03 | Loss: 0.01
[Vaild] Epoch 03 | Acc: 0.986


100%|██████████| 469/469 [00:46<00:00,  9.98it/s]
100%|██████████| 79/79 [00:04<00:00, 16.57it/s]

[Train] Epoch 04 | Loss: 0.11
[Vaild] Epoch 04 | Acc: 0.987



