# Setup

In [36]:
import os
import io
from typing import Tuple, List, Dict

import numpy
import sklearn as sklearn
import torch
import tqdm as tqdm
from torch.nn import CrossEntropyLoss, BCELoss
from torch.optim import SGD
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from torchvision import transforms
import torchvision.transforms.functional as F
from torchvision.datasets import ImageFolder
from IPython.display import display
import torchvision

from tqdm.notebook import tqdm
from torchvision.models import densenet161

from torch import nn

In [2]:
# use cuda if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [None]:
# !gdown https://drive.google.com/uc?id=<formsA-D.tgz>
!gdown https://drive.google.com/uc?id=<sentences.tgz>

In [None]:
!mkdir -p ../iamdataset/sentences-sample
!tar xzvf sentences.tgz -C ../iamdataset/sentences-sample

In [50]:
import importlib
from orientation_nn import OrientationImageFolder
import orientation_nn
importlib.reload(orientation_nn)
import orientation_nn

from orientation_nn import OrientationCNN

%load_ext autoreload
%autoreload 0
# %load_ext autoreload # %autoreload 1 # %aimport orientation_nn

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
# model = None
# load model from .pt
# model = OrientationCNN()
model = densenet161(weights=None) # 'DEFAULT')
model = model.to(device)

model.load_state_dict(torch.load("model-Densenet.1.pt", map_location=device))


# for param in model.parameters():
    # param.requires_grad = False # freeze the pretrained part
# model.classifier = nn.Linear(2208,2)


<All keys matched successfully>

In [64]:
dataset = orientation_nn.OrientationImageFolder('../iamdataset/sentences-sample')
# SOMEHOW dataset = OrientationImageFolder() doesn't work (but *only sometimes* -- other times it works -- wtf!?)
# yet writing dataset = orientation_nn.OrientationImageFolder() fixes it. God damn witchcraft

In [60]:
train_transform = transforms.Compose([
    # transforms.RandomCrop(224, padding=50),
    # transforms.RandomResizedCrop(112, scale=(0.08, 0.25)),
    # transforms.Grayscale(num_output_channels=1),
    transforms.Resize(32), # (32, 256)),
    # orientation_nn.PadToSize((256,32), pad_with=999999), #LOL if you put "1" it will pad with 0.00392157 but if you put a really big number then it will work. Real bruh moment

    # transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation((180, 180)),
    # transforms.RandomInvert(p=1),
    # transforms.RandomInvert(p=1),
    orientation_nn.PadToSize((224,224), pad_with=255, center=True),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    # get transforms from densenet161
    # torchvision.models.DenseNet161_Weights.DEFAULT.transforms()
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),

])
# test_transform = transforms.Compose([
#     transforms.Grayscale(num_output_channels=1),
#     transforms.Resize(32), # (32, 256)),
#     orientation_nn.PadToSize((256,32), pad_with=999999), #LOL if you put "1" it will pad with 0.00392157 but if you put a really big number then it will work. Real bruh moment
#     transforms.RandomCrop((32, 256)),
#     # transforms.RandomInvert(p=0),
#     transforms.ToTensor()
# ])
dataset.transform = train_transform

# loader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4)


# test/train split
# prepare the dataset
def prepare_data(dataset):
    # define standardization
    # trans = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])
    # load dataset
    _datalen = len(dataset)
    _trainlen = int(_datalen * .8)
    train_set, test_set = torch.utils.data.random_split(dataset, [_trainlen, _datalen - _trainlen])

    # prepare data loaders
    train_dl = DataLoader(train_set, batch_size=16, shuffle=True, num_workers=2)
    test_dl = DataLoader(test_set, batch_size=16, shuffle=False)
    return train_dl, test_dl
train_dl, test_dl = prepare_data(dataset)

In [68]:
print(isinstance(dataset, type(dataset)))
# h = orientation_nn.OrientationImageFolder('../iamdataset/sentences-sample')

# this is True:
print(isinstance(orientation_nn.OrientationImageFolder('../foo'), orientation_nn.OrientationImageFolder))

# but this is False. ;-;
print(isinstance(OrientationImageFolder('../foo'), orientation_nn.OrientationImageFolder))


True
True
False


In [None]:
last = None
def peek(train_dl):
    # test it out by taking a peek
    for _ in range(4):
        img1b, lbl = next(iter(train_dl))
            # img1 = F.to_pil_image(train_set.__getitem__(5)[0])
            # print(img1, lbl)
        print(img1b.shape, lbl.shape)
        for i, img1 in enumerate(img1b):
            img1 = F.to_pil_image(img1b[i], mode='RGB') # , mode='L') # grayscale
            # img1.show()
            print(f"label: {'flipped' if lbl[i] else 'normal'}")
            # print(img1b[i])
            display(img1)
            global last
            last = img1b[i]

peek(train_dl)
print(last)
# print(last.numpy())

In [None]:
def validation(model, test_dl, criterion):
    batch_loss = 0
    total_t = 0
    correct_t = 0
    with torch.no_grad():

        model.eval()
        for inputs,targets in test_dl:
            inputs,targets = inputs.to(device),targets.to(device)
            yhat = model(inputs)
            loss_t = criterion(yhat,targets)
            batch_loss+=loss_t.item()
            correct_t += (yhat.argmax(1) == targets).sum().item()
            total_t += targets.size(0)
        # val_acc.append(100 * correct_t/total_t)
        # val_loss.append(batch_loss/len(test_dataloader))
    # network_learned = batch_loss < valid_loss_min
    print(f'validation loss: {batch_loss/len(test_dl)}, validation acc: {(100 * correct_t/total_t):.4f}\n')
    model.train()
# train the model
def train_model(train_dl, model):
    # define the optimization
    criterion = CrossEntropyLoss()
    # criterion = BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # , lr=0.01, momentum=0.9)
    # enumerate epochs
    # add progress bar

    NUM_EPOCHS = 10
    for epoch in range(NUM_EPOCHS):
        # enumerate mini batches
        loop = tqdm(train_dl)
        loop.set_description(f"Epoch [{epoch}/{NUM_EPOCHS}]")

        train_loss = 0
        train_acc = 0
        atts = 0
        for i, (inputs, targets) in enumerate(loop):
            # https://aladdinpersson.medium.com/how-to-get-a-progress-bar-in-pytorch-72bdbf19b35c

            optimizer.zero_grad() # clear the gradients

            inputs = inputs.to(device)
            targets = targets.to(device)

            yhat = model(inputs) # compute the model output
            loss = criterion(yhat, targets) # , targets.reshape(-1, 1)) # calculate loss

            # predicted = model(torch.tensor(inputs,dtype=torch.float32))
            # acc = (predicted.reshape(-1).detach().numpy().round() == targets).mean()

            loss.backward() # credit assignment
            optimizer.step()  # update model weights

            train_loss += loss.item()*inputs.size(0)
            train_acc += (yhat.argmax(1) == targets).sum().item()
            atts += inputs.size(0)
            if atts % 1000 == 0:
                print(f"train_loss: {train_loss/atts}, train_acc: {train_acc/atts}")
            
        # loop.set_postfix(loss=train_loss/len(train_dl.sampler), acc=train_acc/len(train_dl.sampler))
        print(f"train_loss: {train_loss/len(train_dl.sampler)}, train_acc: {train_acc/len(train_dl.sampler)}")
        validation(model, test_dl, criterion)

        # save the model
        torch.save(model.state_dict(), f'model-D4.{epoch}.pt')


In [None]:

# model.dropout = nn.Dropout(0.2)

In [None]:
# prepare the data
# path = '~/.torch/datasets/mnist'
train_dl, test_dl = prepare_data('')
print(len(train_dl.dataset), len(test_dl.dataset))
# define the network
if model is None:
    print('reset!')
    model = OrientationCNN()
model = model.to(device)
# train_dl.to(device)
# # train the model
train_model(train_dl, model)
# evaluate the model
# acc = evaluate_model(test_dl, model)
# print('Accuracy: %.3f' % acc)

In [None]:
# model.load_state_dict(torch.load("model-D4.1.pt"))

In [None]:

last = None
def peek_trained(dl, model):
    model.eval()
    with torch.no_grad():
        # test it out by taking a peek
        for _ in range(1):
            img1b, lbl = next(iter(dl))
                # img1 = F.to_pil_image(train_set.__getitem__(5)[0])
                # print(img1, lbl)
            print(img1b.shape, lbl.shape)
            for i, _ in enumerate(img1b):
                ele = img1b.to(device)
                result = model(ele).detach().cpu().numpy()
                result = result.argmax(1)

                img1 = F.to_pil_image(img1b[i], mode='RGB') # , mode='L') # grayscale
                # img1.show()
                print(f"label: {'flipped' if lbl[i] else 'normal'}")
                # print(result)
                print(f"predicted: {'flipped' if result[i] else 'normal'}")
                # print(img1b[i])
                display(img1)
                global last
                last = img1b[i]


    model.train()

peek_trained(test_dl, model)
"""
print(last)
# print(last.numpy())
# get element 5618 of train_dl and run it through the model
model.eval() # model does a lot better in eval because of dropout

for i in range(600, 630):
  ele, target = train_dl.dataset.__getitem__(i)
  # run it through model
  ele, target = ele.to(device), target
  result = model(ele.unsqueeze(0)).detach().cpu().numpy() # unsqueeze converts it to a 1-batch
  result = result.argmax(1)
  print(f"{i}: {result}, {target}")
  # ele
model.train()
"""

In [None]:
print(model)
# get shape of tensor of dataset
print(dataset[0][0].shape)

from torchsummary import summary

# train_dl has torch.Size([4, 3, 32, 256]) with batch_size = 4
summary(model, (1, 32, 128))

In [None]:
# load model from .pt file
# model = OrientationCNN()
# model.load_state_dict(torch.load('model_scripted.pt'))
# model.eval()
model = torch.jit.load('model_scripted.pt')

In [69]:
from sklearn.metrics import accuracy_score
from numpy import vstack


# evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    loop = tqdm(test_dl)
    loop.set_description(f"Evaluating model")
    for i, (inputs, targets) in enumerate(loop):
        # evaluate the model on the test set
        yhat = model(inputs)
        # retrieve numpy array
        yhat = yhat.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # round to class values
        yhat = yhat.round()
        # store
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate accuracy
    acc = accuracy_score(actuals, predictions)
    return acc

In [70]:
evaluate_model(test_dl, model)

  0%|          | 0/419 [00:00<?, ?it/s]

ValueError: Classification metrics can't handle a mix of binary and multiclass-multioutput targets