In [1]:
!which python3

/opt/anaconda3/envs/document_scanner/bin/python3


In [2]:
import torch
import torchvision

print(torch.__version__)
print(torchvision.__version__)

2.7.1
0.22.1


In [3]:
# device agnostic code
device = torch.device(
    "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
)
device

device(type='mps')

In [4]:
# imports
import os
cwd = os.getcwd()
os.chdir('..')

from src.data_setup import create_dataloaders
from src.model_builder import DocDetector
from src.engine import train

os.chdir(cwd)

In [5]:
os.getcwd()

'/Users/manish/Documents/GitHub/document-scanner-pytorch/notebooks'

In [6]:
# data setup
DATA_DIR = '../data'
BATCH_SIZE = 32
NUM_WORKERS = 0
TRAIN_SPLIT = 0.8
TARGET_SIZE = (512, 512)

train_dataloader, val_dataloader = create_dataloaders(
    data_dir=DATA_DIR,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    train_split=TRAIN_SPLIT,
    target_size=TARGET_SIZE
)

train_dataloader, val_dataloader

Train dataset size: 160
Val dataset size: 40
Batch size: 32
Num workers: 0
Train split: 0.8
Target size: (512, 512)


(<torch.utils.data.dataloader.DataLoader at 0x12ddc0830>,
 <torch.utils.data.dataloader.DataLoader at 0x12ddfa490>)

---

In [7]:
# model code
model = DocDetector().to(device)
model

DocDetector(
  (backbone): MobileNetV3(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
            (activation): ReLU()
            (scale_activation): Hardsigmoid()
          )
          (2): Con

In [8]:
# training

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=0.001
)

results = train(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=val_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    epochs=5,
    device=device
)

Epochs: 0 | Train Loss: 0.1495 |Train Acc: 0.47% |Test Loss: 0.0684 |Test Acc: 0.33%
Epochs: 1 | Train Loss: 0.0622 |Train Acc: 0.31% |Test Loss: 0.1007 |Test Acc: 0.41%
Epochs: 2 | Train Loss: 0.0447 |Train Acc: 0.26% |Test Loss: 0.0559 |Test Acc: 0.30%
Epochs: 3 | Train Loss: 0.0346 |Train Acc: 0.24% |Test Loss: 0.0483 |Test Acc: 0.28%
Epochs: 4 | Train Loss: 0.0314 |Train Acc: 0.22% |Test Loss: 0.0458 |Test Acc: 0.27%
