<a href="https://colab.research.google.com/github/mspatke/MLflow-Deep-Learning-UseCase/blob/main/MLFlow_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install mlflow

Collecting mlflow
  Downloading mlflow-1.24.0-py3-none-any.whl (16.5 MB)
[K     |████████████████████████████████| 16.5 MB 160 kB/s 
Collecting querystring-parser
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)
Collecting alembic
  Downloading alembic-1.7.6-py3-none-any.whl (210 kB)
[K     |████████████████████████████████| 210 kB 38.7 MB/s 
Collecting gunicorn
  Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 6.8 MB/s 
[?25hCollecting docker>=4.0.0
  Downloading docker-5.0.3-py2.py3-none-any.whl (146 kB)
[K     |████████████████████████████████| 146 kB 45.3 MB/s 
Collecting gitpython>=2.1.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 40.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 

In [3]:
import pandas as pd
import torch
import os
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR #step learning rate
from torchvision import datasets, transforms
import mlflow
import mlflow.pytorch

In [4]:
class Config:
    EPOCHS = 10
    BATCH_SIZE =32
    LR = 0.01
    DEVICE= 'cuda' if torch.cuda.is_available() else 'cpu'
    GAMMA = 0.7
    SEED = 42
    LOG_INTERVAL = 10
    TEST_BATCH_SIZE = 1000
    DRY_RUN = True

In [5]:
config = Config()

In [16]:
torch.manual_seed(config.SEED)

class ConvNet(nn.Module):

  def __init__(self):

    super(ConvNet, self).__init__()
    self.conv1 = nn.Conv2d(1,32,3,1)
    self.conv2 = nn.Conv2d(32, 64, 3,1)
    self.dropout1 = nn.Dropout(0.25)
    self.dropout2 = nn.Dropout(0.5)
    self.fc1=nn.Linear(9216,128)
    self.fc2 = nn.Linear(128, 10)



  def forward(self , x):

    x = self.conv1(x)
    x = F.relu(x)

    x = self.conv2(x)
    x = F.relu(x)
   
    x = F.max_pool2d(x,2)
    x = self.dropout1(x)

    x = torch.flatten(x,1)
    x = self.fc1(x)

    x=F.relu(x)
    x=self.dropout2(x)

    x=self.fc2(x)

    #output = F.log_softmax(x, dim=1)

    return x      

In [17]:
def train_(config, model, device, train_loader, optimizer, epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    pred = model.forward(data)
    loss = F.cross_entropy(pred, target)
    loss.backward()
    optimizer.step()
    if batch_idx % config.LOG_INTERVAL == 0:
      print(f"train epoch: {epoch}[{batch_idx * len(data)}/{len(train_loader.dataset)} ({100.0 * batch_idx/len(train_loader):.0f})]\t Loss: {loss.item():.6f}")

      if config.DRY_RUN:
        break

In [18]:
def test(model, device, test_loader):
  pass

In [19]:
torch.manual_seed(config.SEED)

<torch._C.Generator at 0x7f23aa3fe530>

In [20]:
train_kwargs = {"batch_size": config.BATCH_SIZE}
test_kwargs = {"batch_size": config.TEST_BATCH_SIZE}

In [21]:
if config.DEVICE =="cuda":
  cuda_kwargs = {"num_workers":1, "pin_memory":True, "shuffle":True}
  train_kwargs.update(cuda_kwargs)
  test_kwargs.update(cuda_kwargs)

In [22]:
trans = transforms.Compose(
    [transforms.ToTensor()]
)

In [23]:
train = datasets.MNIST("../data", train = True , download= True, transform=trans)
test  = datasets.MNIST("../data", train=False, download=True, transform= trans)

train_loader = torch.utils.data.DataLoader(train, **train_kwargs)
test_loader = torch.utils.data.DataLoader(test, **test_kwargs)

In [24]:
from torch.utils.data import DataLoader, Dataset

In [25]:
model = ConvNet().to(config.DEVICE)
scripted_model = torch.jit.script(model)
print(scripted_model)
optimizer = torch.optim.Adam(model.parameters(), lr=config.LR)

scheduler = StepLR(optimizer, step_size=1, gamma=config.GAMMA)

RecursiveScriptModule(
  original_name=ConvNet
  (conv1): RecursiveScriptModule(original_name=Conv2d)
  (conv2): RecursiveScriptModule(original_name=Conv2d)
  (dropout1): RecursiveScriptModule(original_name=Dropout)
  (dropout2): RecursiveScriptModule(original_name=Dropout)
  (fc1): RecursiveScriptModule(original_name=Linear)
  (fc2): RecursiveScriptModule(original_name=Linear)
)


In [26]:
for i in train_loader:
  print(i[0].shape, i[1].shape)
  break

torch.Size([32, 1, 28, 28]) torch.Size([32])


In [27]:
from torch.utils.data import DataLoader, Dataset

In [28]:
#training

for epoch in range(1,config.EPOCHS + 1):
  train_(config, scripted_model, config.DEVICE, train_loader, optimizer, epoch)
  scheduler.step()

train epoch: 1[0/60000 (0)]	 Loss: 2.305307
train epoch: 2[0/60000 (0)]	 Loss: 6.217451
train epoch: 3[0/60000 (0)]	 Loss: 3.385877
train epoch: 4[0/60000 (0)]	 Loss: 2.220319
train epoch: 5[0/60000 (0)]	 Loss: 2.266742
train epoch: 6[0/60000 (0)]	 Loss: 2.323485
train epoch: 7[0/60000 (0)]	 Loss: 2.276761
train epoch: 8[0/60000 (0)]	 Loss: 2.296962
train epoch: 9[0/60000 (0)]	 Loss: 2.296360
train epoch: 10[0/60000 (0)]	 Loss: 2.264648


In [30]:
with mlflow.start_run() as run:
  mlflow.pytorch.log_model(model, "model")
  model_path = mlflow.get_artifact_uri("model")
  loaded_torch_model = mlflow.pytorch.load_model(model_path)
  model.eval()
  with torch.no_grad():
    test_datapoints, test_target = next(iter(test_loader))
    pred = model(test_datapoints[0].reshape((1,1,28,28)).to(config.DEVICE))
    actual = test_target[0].item()
    predicted = torch.argmax(pred).item()
    print(f"actual:{actual}, prdicted :{predicted}")



actual:2, prdicted :6
