In [1]:
!pip install wandb --upgrade

Requirement already up-to-date: wandb in /usr/local/lib/python3.7/dist-packages (0.10.33)


In [2]:
import numpy as np
import time
import matplotlib.pyplot as plt
import torch
from tqdm.notebook import tqdm_notebook
import torch.nn.functional as F
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data import DataLoader


if torch.cuda.is_available():
  device = torch.device("cuda:0")
  torch.backends.cudnn.deterministic = True
else:
  device = torch.device("cpu")

In [3]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33msaiamrit[0m (use `wandb login --relogin` to force relogin)


True

In [64]:
config = dict(
    epochs= 30,
    input = 784,
    hidden1 = 512,
    hidden2 = 512,
    output = 10,
    batch_size = 128,
    learning_rate = 0.9,
    dataset="MNIST",
    architecture="Logistic Regression"
)

In [65]:
def model_pipeline(hyperparameters):
  # start wandb experiment tracking
  with wandb.init(project = 'logistic-regression on mnist', config = hyperparameters):
    # to ensure logging Hyper parameters match execution
    config = wandb.config

    # initialise dataloaders, model, loss function and optimizer
    train_dataloader, test_dataloader, model, criteria, optimizer = create(config)
    print(model)

    # train and test the model
    train(train_dataloader, test_dataloader, model, criteria, optimizer, config)

    # save the model
    # torch.onnx.export(model, images, "model.onnx")
    wandb.save("model.onnx")

In [66]:
def create(config):
  train = datasets.MNIST(root = '/data', download = True, transform = transforms.ToTensor(), train = True)
  test = datasets.MNIST(root = '/data', transform = transforms.ToTensor(), train = False)

  train_dataloader = DataLoader(dataset = train, batch_size = config.batch_size, shuffle = True)
  test_dataloader = DataLoader(dataset = test, batch_size = config.batch_size, shuffle = False)
  
  torch.manual_seed(42)
  model = LogisticRegressionModel(config.input, config.hidden1, config.hidden2, config.output).to(device)

  criteria = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr = config.learning_rate)

  return train_dataloader, test_dataloader, model, criteria, optimizer

In [67]:
# # Hyperparameters
# random_seed = 1
# batch_size = 64
# learning_rate = 0.01
# epochs = 20

#Architecture
# input = 784
# hidden1 = 512
# hidden2 = 256
# output = 10

In [68]:
# train = datasets.MNIST(root = '/data', download = True, transform = transforms.ToTensor(), train = True)
# test = datasets.MNIST(root = '/data', transform = transforms.ToTensor(), train = False)

# train_dataloader = DataLoader(dataset = train, batch_size = batch_size, shuffle = True)
# test_dataloader = DataLoader(dataset = test, batch_size = batch_size, shuffle = False)

In [69]:
class LogisticRegressionModel(nn.Module):
  def __init__(self, input, hidden1, hidden2, output):
    super(LogisticRegressionModel, self).__init__()

    self.lin1 = nn.Linear(input, hidden1)
    self.lin2 = nn.Linear(hidden1, hidden2)
    self.lin3 = nn.Linear(hidden2, output)

  def forward(self, x):
    x = F.relu(self.lin1(x))
    x = F.relu(self.lin2(x))
    out = self.lin3(x)

    return out

# torch.manual_seed(random_seed)
# model = LogisticRegressionModel(input, output).to(device)

In [70]:
# criteria = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [71]:
def compute_accuracy(model, dataloader):
  correct = 0
  total = 0

  model.eval()
  with torch.no_grad():
    for images, labels in dataloader:
      images = images.view(-1, 28*28).to(device)
      labels = labels.to(device)
      
      outputs = model(images)

      preds = F.softmax(outputs)
      _, predictions = torch.max(preds,1)

      total += len(labels)
      correct += (predictions == labels).sum()

      return (correct/total)*100

In [75]:
def train(train_dataloader, test_dataloader, model, criterion, optimizer, config):
  model.train()
  wandb.watch(model, criterion, log = "all", log_freq = 10)
  start = time.time()
  # ep = []
  # train_loss = []
  # train_acc = []
  for epoch in tqdm_notebook(range(config.epochs), desc = 'Epoch progress'):
    for batch_idx, (images, labels) in tqdm_notebook(enumerate(train_dataloader), desc = 'Batch Training'):
      images = images.view(-1, 28*28).to(device)
      labels = labels.to(device)

      outputs = model(images)

      loss = criterion(outputs, labels)
      # ep.append(epoch)
      
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if batch_idx % 300 == 0:
        print('Epoch: {}/{} | Batch: {}/{} | Train Loss = {:.4f}'.format(epoch, config.epochs, batch_idx, len(train_dataloader), loss))
    acc = compute_accuracy(model, train_dataloader)  
    print('For Epoch: {}/{}, Training Accuracy is: {}'.format(epoch, config.epochs, acc))
    wandb.log({"training accuracy": acc, "training loss": loss}, step = epoch)
    test(model, test_dataloader)
    # ep.append(epoch); train_acc.append(acc); train_loss.append(loss)
    print('Time taken for epoch {}/{} : {}'.format(epoch, config.epochs, (time.time()-start)/60))
  print('Total Training Time taken : {}'.format((time.time()-start)/60))
  # wandb.save("model.pt")

In [76]:
def test(model, test_dataloader):
  model.eval()

  with torch.no_grad():
    test_acc = compute_accuracy(model, test_dataloader)

    print("Test Accuracy is: ", test_acc)
    wandb.log({"test_accuracy": test_acc})

In [77]:
# plt.plot(ep,train_loss)
# plt.plot(ep,train_acc)
# plt.plot()

In [78]:
model = model_pipeline(config)

LogisticRegressionModel(
  (lin1): Linear(in_features=784, out_features=512, bias=True)
  (lin2): Linear(in_features=512, out_features=512, bias=True)
  (lin3): Linear(in_features=512, out_features=10, bias=True)
)


HBox(children=(FloatProgress(value=0.0, description='Epoch progress', max=30.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 0/30 | Batch: 0/469 | Train Loss = 2.3007
Epoch: 0/30 | Batch: 300/469 | Train Loss = 0.1158

For Epoch: 0/30, Training Accuracy is: 96.875
Test Accuracy is:  tensor(96.0938, device='cuda:0')
Time taken for epoch 0/30 : 0.09850426912307739


  del sys.path[0]


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 1/30 | Batch: 0/469 | Train Loss = 0.1529
Epoch: 1/30 | Batch: 300/469 | Train Loss = 0.1097

For Epoch: 1/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 1/30 : 0.19310096899668375


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 2/30 | Batch: 0/469 | Train Loss = 0.0310
Epoch: 2/30 | Batch: 300/469 | Train Loss = 0.0500

For Epoch: 2/30, Training Accuracy is: 96.875
Test Accuracy is:  tensor(96.8750, device='cuda:0')
Time taken for epoch 2/30 : 0.28942071199417113


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 3/30 | Batch: 0/469 | Train Loss = 0.0326
Epoch: 3/30 | Batch: 300/469 | Train Loss = 0.0064

For Epoch: 3/30, Training Accuracy is: 98.4375
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 3/30 : 0.3853510141372681


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 4/30 | Batch: 0/469 | Train Loss = 0.0241
Epoch: 4/30 | Batch: 300/469 | Train Loss = 0.0696

For Epoch: 4/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(99.2188, device='cuda:0')
Time taken for epoch 4/30 : 0.48115397691726686


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 5/30 | Batch: 0/469 | Train Loss = 0.0133
Epoch: 5/30 | Batch: 300/469 | Train Loss = 0.0462

For Epoch: 5/30, Training Accuracy is: 98.4375
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 5/30 : 0.5781287749608358


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 6/30 | Batch: 0/469 | Train Loss = 0.0132
Epoch: 6/30 | Batch: 300/469 | Train Loss = 0.0096

For Epoch: 6/30, Training Accuracy is: 98.4375
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 6/30 : 0.6740636587142944


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 7/30 | Batch: 0/469 | Train Loss = 0.0104
Epoch: 7/30 | Batch: 300/469 | Train Loss = 0.0008

For Epoch: 7/30, Training Accuracy is: 99.21875
Test Accuracy is:  tensor(99.2188, device='cuda:0')
Time taken for epoch 7/30 : 0.7698908885320027


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 8/30 | Batch: 0/469 | Train Loss = 0.0385
Epoch: 8/30 | Batch: 300/469 | Train Loss = 0.0050

For Epoch: 8/30, Training Accuracy is: 99.21875
Test Accuracy is:  tensor(99.2188, device='cuda:0')
Time taken for epoch 8/30 : 0.8658678809801738


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 9/30 | Batch: 0/469 | Train Loss = 0.0035
Epoch: 9/30 | Batch: 300/469 | Train Loss = 0.0037

For Epoch: 9/30, Training Accuracy is: 99.21875
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 9/30 : 0.9616718133290608


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 10/30 | Batch: 0/469 | Train Loss = 0.0231
Epoch: 10/30 | Batch: 300/469 | Train Loss = 0.0070

For Epoch: 10/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(99.2188, device='cuda:0')
Time taken for epoch 10/30 : 1.0572243094444276


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 11/30 | Batch: 0/469 | Train Loss = 0.0014
Epoch: 11/30 | Batch: 300/469 | Train Loss = 0.0085

For Epoch: 11/30, Training Accuracy is: 99.21875
Test Accuracy is:  tensor(99.2188, device='cuda:0')
Time taken for epoch 11/30 : 1.1560786684354147


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 12/30 | Batch: 0/469 | Train Loss = 0.0003
Epoch: 12/30 | Batch: 300/469 | Train Loss = 0.0012

For Epoch: 12/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 12/30 : 1.2529293219248454


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 13/30 | Batch: 0/469 | Train Loss = 0.0000
Epoch: 13/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 13/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 13/30 : 1.3497721672058105


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 14/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 14/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 14/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 14/30 : 1.445858120918274


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 15/30 | Batch: 0/469 | Train Loss = 0.0010
Epoch: 15/30 | Batch: 300/469 | Train Loss = 0.0002

For Epoch: 15/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 15/30 : 1.5411941846211752


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 16/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 16/30 | Batch: 300/469 | Train Loss = 0.0003

For Epoch: 16/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 16/30 : 1.638793949286143


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 17/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 17/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 17/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 17/30 : 1.7371155261993407


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 18/30 | Batch: 0/469 | Train Loss = 0.0002
Epoch: 18/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 18/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 18/30 : 1.8337245186169941


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 19/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 19/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 19/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 19/30 : 1.930072057247162


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 20/30 | Batch: 0/469 | Train Loss = 0.0002
Epoch: 20/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 20/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 20/30 : 2.0255961696306866


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 21/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 21/30 | Batch: 300/469 | Train Loss = 0.0002

For Epoch: 21/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 21/30 : 2.121223553021749


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 22/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 22/30 | Batch: 300/469 | Train Loss = 0.0004

For Epoch: 22/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 22/30 : 2.2203947424888613


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 23/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 23/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 23/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 23/30 : 2.3173837820688883


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 24/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 24/30 | Batch: 300/469 | Train Loss = 0.0002

For Epoch: 24/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 24/30 : 2.41356768210729


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 25/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 25/30 | Batch: 300/469 | Train Loss = 0.0000

For Epoch: 25/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 25/30 : 2.510150917371114


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 26/30 | Batch: 0/469 | Train Loss = 0.0002
Epoch: 26/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 26/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 26/30 : 2.6074670275052387


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 27/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 27/30 | Batch: 300/469 | Train Loss = 0.0004

For Epoch: 27/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 27/30 : 2.7041593551635743


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 28/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 28/30 | Batch: 300/469 | Train Loss = 0.0001

For Epoch: 28/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 28/30 : 2.8019798080126446


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Batch Training', max=1.0, style=Progres…

Epoch: 29/30 | Batch: 0/469 | Train Loss = 0.0001
Epoch: 29/30 | Batch: 300/469 | Train Loss = 0.0000

For Epoch: 29/30, Training Accuracy is: 100.0
Test Accuracy is:  tensor(100., device='cuda:0')
Time taken for epoch 29/30 : 2.898721480369568

Total Training Time taken : 2.898796832561493


VBox(children=(Label(value=' 0.28MB of 0.28MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
training accuracy,100.0
training loss,9e-05
test_accuracy,100.0
_runtime,177.0
_timestamp,1625336398.0
_step,29.0


0,1
training accuracy,▁█▁▅█▅▅▆▆▆█▆██████████████████
training loss,█▆▅▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁█▂█▇██▇▇█▇▇██████████████████
_runtime,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇██
_timestamp,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇██
_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
