In [None]:
#Using weights and Biases for model tracking.
!pip install wandb
import wandb

!wandb login


Aborted!
^C


In [None]:
import torch
from torchvision.datasets import ImageFolder
from torch import nn
from torchvision import transforms, datasets

from PIL import Image
from torch.utils.data import DataLoader

import copy
import os
from tqdm.notebook import tqdm

import cv2
import matplotlib.pyplot as plt
import numpy as np

from src.model import Model

In [None]:
config = dict(
    epochs=10,
    batch_size=32,
    learning_rate=0.001,
    dataset="Digital digits",
    architecture="CNN",
    optimizer = "Adam",
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    )

In [None]:
def model_pipeline(hyperparameters):

    # tell wandb to get started
    with wandb.init(project="Sudoku-solver", config=hyperparameters):
      # access all HPs through wandb.config, so logging matches execution!
      config = wandb.config

      # make the model, data, and optimization problem
      model, dataloader, criterion, optimizer = make(config)
      
      # use a pretrained model
    #   model.load_state_dict(torch.load("model.pt"))

      # train the model
      model = train(model, dataloader, criterion, optimizer, config)

      #visualise some images 
    #   visualize(model, dataloader)

      #test the model
    #   test_model(model, test_dataloader)
      
      # Save the model
      save_model(model, config)

    return model, dataloader

def save_model(model, config):
    torch.save(model.state_dict(), "model.pt")
    wandb.save("model.pt")

In [None]:
def make(config):
    # Make the data
    dataloaders = preprocess_data(config.batch_size)

    # Make the model
    model = Model(1, 10).to(config.device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    
    return model, dataloaders, criterion, optimizer

def preprocess_data(BATCH_SIZE):
    phase = ['train', 'val']

    # print(os.getcwd())
    data_dir ='./dataset' 
    split = 0.2
    transform = transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                    transforms.Resize((32,32)),
                                    transforms.ToTensor(),
                                ])
    
    dataset = datasets.ImageFolder(data_dir, transform=transform)
    val_len = int(split*len(dataset))
    train_len = len(dataset) - val_len
    train_data, val_data = torch.utils.data.random_split(dataset, [train_len, val_len])

    dataset = {"train":train_data, "val":val_data}
    dataloader = {x : DataLoader(dataset[x], batch_size=BATCH_SIZE, shuffle=True, drop_last = True) for x in phase }

    return dataloader

In [None]:
def train(model, 
          dataloaders,
          criterion,
          optimizer,
          config,
          ):
    wandb.watch(model, criterion, log="all", log_freq=10)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e+7

    # images, _ = next(iter(dataloaders['train']))
    
    for epoch in range(config.epochs):
        print('Epoch {}/{}'.format(epoch+1, config.epochs))
        print('-' * 10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(config.device) 
                labels = labels.to(config.device)

                # inputs = inputs.view(inputs.shape[0], -1)

                #calculate batch loss, accuracy
                loss, corrects = train_batch(inputs, labels, model, optimizer, criterion, phase)

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += corrects.item()
                            
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
            wandb.log({f"{phase}_loss": loss}, step=epoch)
            wandb.log({f"{phase}_accuracy": epoch_acc}, step=epoch)

            print('{} Loss: {:.4f} | Accuracy: {:.2f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
        
        print()

    print('Best val loss: {:4f}'.format(best_loss))
    wandb.run.summary["val_loss"] = best_loss

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

def train_batch(inputs, labels, model, optimizer, criterion, phase):

    # zero the parameter gradients
    optimizer.zero_grad()

    with torch.set_grad_enabled(phase == 'train'):
        # forward
        outputs = model(inputs)
        _,preds = torch.max(outputs, 1)
        # print("outputs", outputs)
        # print("labels", labels)
        loss = criterion(outputs, labels)
        corrects = (preds==labels).sum()

        # backward + optimize only if in training phase
        if phase == 'train':
            loss.backward()
            optimizer.step()

    return loss, corrects

In [None]:
model, dataloader = model_pipeline(config)

Epoch 1/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))




train Loss: 1.5203


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4701

Epoch 2/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4731


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4633

Epoch 3/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4683


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4582

Epoch 4/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4653


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4571

Epoch 5/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4662


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4600

Epoch 6/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4643


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4574

Epoch 7/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4640


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4561

Epoch 8/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4638


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4546

Epoch 9/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4635


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4555

Epoch 10/10
----------


HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


train Loss: 1.4622


HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))


val Loss: 1.4545

Best val loss: 1.454454


VBox(children=(Label(value=' 0.37MB of 0.37MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_loss,1.46115
train_accuracy,0.99914
val_loss,1.4619
val_accuracy,0.9877
_runtime,58.0
_timestamp,1620604699.0
_step,9.0


0,1
train_loss,▁█▆▁▁▁▁▁▁▁
train_accuracy,▁▇▇███████
val_loss,█▄▁▁▁▁▁▁▁▁
val_accuracy,▁▅▇▇▅▆▇█▇█
_runtime,▁▂▃▃▄▅▆▇██
_timestamp,▁▂▃▃▄▅▆▇██
_step,▁▂▃▃▄▅▆▆▇█
