In [1]:
import torch
import os
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms
import mlflow
import mlflow.pytorch 

In [3]:
class Config:
    EPOCHS = 10
    BATCH_SIZE = 32
    LR = 0.01
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    GAMMA = 0.7
    SEED = 42
    LOG_INTERVAL = 10
    TEST_BATCH_SIZE = 1000
    DRY_RUN = True

In [4]:
config = Config()

In [5]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = torch.flatten(x, 1)
        x = self.fc2(x)
        
        output = F.log_softmax(x, dim = 1)
        return output
        

In [16]:
def train(config, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        pred = model(data)
        loss = F.cross_entropy(pred, target)
        loss.backward()
        optimizer.step()
        if batch_idx % config.LOG_INTERVAL == 0:
            print(f"train epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100.0 * batch_idx / len(train_loader) :.0f})]\t Loss: {loss.item():.6f}")
            
            if config.DRY_RUN:
                break

In [7]:
def test(model, device, test_loader):
    pass

In [8]:
torch.manual_seed(config.SEED)

<torch._C.Generator at 0x7fd8fc0b4410>

In [9]:
train_kwargs = {"batch_size" : config.BATCH_SIZE}
test_kwargs = {"batch_size" : config.TEST_BATCH_SIZE}

In [10]:
if config.DEVICE == "cuda":
    cuda_kwargs = {"num_workers" : 1, "pin_memory" : True, "shuffle" : True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

In [11]:
transforms = transforms.Compose(
            [transforms.ToTensor()]
)

In [12]:
train = datasets.MNIST("../data", train = True, download = True, transform = transforms)
test = datasets.MNIST("../data", train = False, transform = transforms)

train_loader = torch.utils.data.DataLoader(train, **train_kwargs)
test_loader = torch.utils.data.DataLoader(test, **test_kwargs)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [14]:
model = ConvNet().to(config.DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr = config.LR)

scheduler = StepLR(optimizer, step_size = 1, gamma = config.GAMMA)

In [17]:
# training Loop

for epoch in range(1, config.EPOCHS + 1):
    train(config, model, config.DEVICE, train_loader, optimizer, epoch)
    scheduler.step()
    

train epoch: 1 [0/60000 (0)]	 Loss: 2.315926
train epoch: 2 [0/60000 (0)]	 Loss: 5.148420
train epoch: 3 [0/60000 (0)]	 Loss: 2.891138
train epoch: 4 [0/60000 (0)]	 Loss: 2.280523
train epoch: 5 [0/60000 (0)]	 Loss: 2.241330
train epoch: 6 [0/60000 (0)]	 Loss: 2.274588
train epoch: 7 [0/60000 (0)]	 Loss: 2.297429
train epoch: 8 [0/60000 (0)]	 Loss: 2.316872
train epoch: 9 [0/60000 (0)]	 Loss: 2.283734
train epoch: 10 [0/60000 (0)]	 Loss: 2.281778


In [None]:
with mlflow.start_run() as run:
    mlflow.pytorch.log_model(model, "model")
    model_path = mlflow.get_artifact_uri("model")
    loaded_torch_model = mlflow.pytorch.load_model(model_path)
    model.eval()
    with torch.no_grad():
        test_datapoints, test_target = next(iter(test_loader))
        pred = model(test_datapoints[0].unsqeeze(0)).to(config.DEVICE)
        actual = test_target[0].item()
        predicted = torch.argmax(pred).item()
        print(f"actual: {actual}, predicted: {predicted}")