In [1]:
!which python3

/opt/anaconda3/envs/document_scanner/bin/python3


In [2]:
import torch
import torchvision

print(torch.__version__)
print(torchvision.__version__)

2.7.1
0.22.1


In [3]:
# device agnostic code
device = torch.device(
    "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
)
device

device(type='mps')

In [4]:
# imports
import os
cwd = os.getcwd()
os.chdir('..')

from src.data_setup import create_dataloaders
from src.model_builder import DocDetector
from src.engine import train

os.chdir(cwd)

In [5]:
os.getcwd()

'/Users/manish/Documents/GitHub/document-scanner-pytorch/notebooks'

In [6]:
# data setup
DATA_DIR = '../data'
BATCH_SIZE = 32
NUM_WORKERS = 0
TRAIN_SPLIT = 0.8
TARGET_SIZE = (512, 512)

train_dataloader, val_dataloader = create_dataloaders(
    data_dir=DATA_DIR,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    train_split=TRAIN_SPLIT,
    target_size=TARGET_SIZE
)

train_dataloader, val_dataloader

Train dataset size: 160
Val dataset size: 40
Batch size: 32
Num workers: 0
Train split: 0.8
Target size: (512, 512)


(<torch.utils.data.dataloader.DataLoader at 0x11dec0830>,
 <torch.utils.data.dataloader.DataLoader at 0x11defa490>)

---

In [7]:
# model code
model = DocDetector().to(device)
model

DocDetector(
  (backbone): MobileNetV3(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
            (activation): ReLU()
            (scale_activation): Hardsigmoid()
          )
          (2): Con

In [8]:
# training

loss_fn = torch.nn.SmoothL1Loss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

results = train(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=val_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    epochs=5,
    device=device
)

Pred range: [-0.311, 0.224]
True range: [-0.125, 1.099]
Sample pred corner: [-0.11513646 -0.06559688]
Sample true corner: [-0.04440559  0.06724491]
Distance range: [0.041, 1.464]
Points within 0.05 pixels: 1/128
Pred range: [-0.060, 0.656]
True range: [-0.051, 1.113]
Sample pred corner: [0.15888089 0.10417342]
Sample true corner: [0.61593276 0.01420634]
Distance range: [0.010, 1.014]
Points within 0.05 pixels: 4/128
Epochs: 0 | Train Loss: 0.0760 |Train Acc: 1.56% |Test Loss: 0.0678 |Test Acc: 6.25%
Pred range: [-0.121, 1.178]
True range: [-0.125, 1.040]
Sample pred corner: [0.5432048  0.22035581]
Sample true corner: [0.39088708 0.39123735]
Distance range: [0.005, 0.600]
Points within 0.05 pixels: 1/128
Pred range: [-0.033, 0.907]
True range: [-0.051, 1.113]
Sample pred corner: [0.26877415 0.1006555 ]
Sample true corner: [0.61593276 0.01420634]
Distance range: [0.001, 0.568]
Points within 0.05 pixels: 4/128
Epochs: 1 | Train Loss: 0.0216 |Train Acc: 2.97% |Test Loss: 0.0241 |Test Acc: 

In [9]:
# save the model
os.chdir('..')

from src.utils import save_model

os.chdir(cwd)

In [10]:
from pathlib import Path

MODEL_NAME = 'DocDetector.pth'
SAVE_MODEL_DIR = Path('../model') / MODEL_NAME

save_model(
    model=model,
    target_dir=SAVE_MODEL_DIR,
    model_name=MODEL_NAME
)

[INFO] Saving model to: ../model/DocDetector.pth/DocDetector.pth
