In [1]:
import os
import cv2
import toml
import random
import wandb
from pycocotools.coco import COCO
import numpy as np
import torch
import torch.nn as nn
import skimage.io as io

import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torchvision.transforms.functional as tf

from src.utils import set_seed
from src.dataloader import COCODataset, Transformations
from src.model import DeepLabV3
from src.train_utils import train_model

In [2]:
DATA_PATH = "/home/glebk/Datasets/dataset_ocr" 


In [3]:
cfg = toml.load("/home/glebk/VSProjects/projects/tmp/src/cfg.toml")
cfg

{'img_size': [200, 100],
 'mask_size': [26, 13],
 'p_hflip': 0.5,
 'p_vflip': 0.5,
 'wandb_logging': False,
 'n_iter': 1000,
 'n_iter_val': 100,
 'lr': 0.001,
 'bs': 2,
 'save_best_val': True,
 'clip_grad': False,
 'checkpoint_path': './weights',
 'n_classes': 3,
 'n_blocks': [3, 4, 23, 3],
 'atrous_rates': [6, 12, 18],
 'multi_grids': [1, 2, 4],
 'output_stride': 8}

In [4]:
t_train = Transformations(cfg["img_size"], cfg["p_hflip"], cfg["p_vflip"], cfg["mask_size"])
t_test = Transformations(cfg["img_size"], 0, 0, cfg["mask_size"])


In [5]:
annot_file_train = os.path.join(DATA_PATH, "train_tf" + ".json")
coco_train = COCO(annot_file_train)
dataset_train = COCODataset(coco_train, DATA_PATH, t_train)

annot_file_test = os.path.join(DATA_PATH, "test_tf" + ".json")
coco_test = COCO(annot_file_test)
dataset_test = COCODataset(coco_test, DATA_PATH, t_test)


loading annotations into memory...
Done (t=0.11s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [6]:
# idx = np.random.randint(len(dataset_train))
# img, mask, resized_mask = dataset_train[idx]

# fig, ax = plt.subplots(1, 3, figsize=(12, 12), sharey=False)
# ax[0].imshow(tf.to_pil_image(img))
# ax[1].imshow(mask[0])
# ax[2].imshow(resized_mask[0])

In [7]:
if cfg["wandb_logging"]:
    wandb.init(project="OCR", entity="glebk")
    wandb.config = cfg


In [8]:
train_loader = DataLoader(dataset_train, batch_size=cfg["bs"], shuffle=True)
val_loader = DataLoader(dataset_test, batch_size=cfg["bs"], shuffle=False)

print(len(train_loader))
print(len(val_loader))


429
85


In [9]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model = DeepLabV3(
    n_classes=cfg["n_classes"],
    n_blocks=cfg["n_blocks"],
    atrous_rates=cfg["atrous_rates"],
    multi_grids=cfg["multi_grids"],
    output_stride=cfg["output_stride"],
).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=cfg["lr"])
scheduler = None
# scheduler = torch.optim.lr_scheduler.StepLR(
#     optimizer, step_size=2500, gamma=0.95, verbose=True
# )

loss = nn.CrossEntropyLoss()

In [10]:
train_model(model, train_loader, val_loader, device, optimizer, scheduler, cfg, loss)


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
loss: 0.31801, avg loss: 0.52713:  30%|██▉       | 299/1000 [01:11<01:06, 10.53it/s]