In [1390]:
# add more fc layers for 5 digit output
# train.py
# inference.py

In [1391]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import os
from pathlib import Path

import torchvision
from torchvision import datasets

# import os
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import time
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import ttach as tta

import albumentations as A
import albumentations.pytorch

import models
import config

In [1392]:
start = time.time()

In [1393]:
# setting seed
torch.manual_seed(0)
np.random.seed(0)

In [1394]:
# setting device to cuda if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [1395]:
# HYPERPARAMS HERE

# params
lr = 3e-3
momentum = 0.9
weight_decay = 3e-3

# lr scheduler
step_size = 4 # after this many epochs we will mult our lr by gamma
gamma = 0.1 # lr multiplier every step_size epochs

# transforms
presize = 256
crop = 256

# batch size
batch_size = 32

# n_epochs
frozen = 2
unfrozen = 2

# tta
tta_crop = int(presize*0.9)

In [1396]:
# transforms
train_transform = A.Compose([
        # A.SmallestMaxSize(presize),
        A.RandomSizedBBoxSafeCrop (crop, crop),
        A.RandomCrop(crop, crop),
        A.Normalize(),
        A.Rotate(limit=30),
        A.HorizontalFlip(),
        A.Cutout(),
        A.Resize(crop, crop),
        albumentations.pytorch.ToTensorV2()], 
        bbox_params=A.BboxParams(format='albumentations', min_area=256, min_visibility=0.1))

valid_transform = A.Compose([
        # A.SmallestMaxSize(presize),
        A.RandomSizedBBoxSafeCrop (crop, crop),
        A.CenterCrop(crop, crop),
        A.Normalize(),
        A.Resize(crop, crop),
        albumentations.pytorch.ToTensorV2()],
        bbox_params=A.BboxParams(format='albumentations', min_area=256, min_visibility=0.1))

test_transform = A.Compose([
        # A.SmallestMaxSize(presize),
        A.RandomSizedBBoxSafeCrop (crop, crop),
        A.CenterCrop(crop, crop),
        A.Normalize(),
        A.Resize(crop, crop),
        albumentations.pytorch.ToTensorV2()],
        bbox_params=A.BboxParams(format='albumentations', min_area=256, min_visibility=0.1))

In [1397]:
# dataset class
class dataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return (self.df.shape[0])

    def __getitem__(self, index):
        # image = Image.open(self.df.fname[index]).convert('RGB')
        # image = np.array(image)
        image = cv2.imread(self.df.fname[index])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # label = torch.tensor(self.df.label[index]).long()
        class_id = self.df.label[index]
        # assert class_id is not None, 'class id is None'

        xmin = self.df.xmin_alb[index]
        ymin = self.df.ymin_alb[index]
        xmax = self.df.xmax_alb[index]
        ymax = self.df.ymax_alb[index]

        # assert xmin is not None, 'xmin is None'
        # assert ymin is not None, 'ymin is None'
        # assert xmax is not None, 'xmax is None'
        # assert ymax is not None, 'ymax is None'

        # it will need lol to itterate through
        # bboxes = [xmin, ymin, xmax, ymax, class_id]
        bboxes = [[xmin, ymin, xmax, ymax, class_id]]
        # bbox.append(class_id)
        # print(f'bbox: {bboxes}')
        
        # assert len([item for sublist in bboxes for item in sublist])==5, 'check error here'

        if self.transform:
            transformed = self.transform(image=image, bboxes=bboxes)
            
            image = transformed['image']
            bboxes = transformed['bboxes']

        # print(f'transformed bbox: {bboxes}')
        # xmin, ymin, xmax, ymax, class_id = [3,4]
        # xmin, ymin, xmax, ymax, class_id = [item for sublist in bboxes for item in sublist]
        # label = torch.tensor((xmin, ymin, xmax, ymax, class_id))
        # label = torch.tensor(bboxes)
        label = torch.tensor(bboxes).flatten()

        # print(f'label shape: {label.shape}')

        # if not bboxes:
        #     label = torch.tensor([0,0,1,1,class_id])
        # else:
        #     label = torch.tensor(bboxes).flatten()

        # print('hi')
        # print(bbox)
        # print(f'label len: {len(label)}')
        # print(f'image size: {image.shape}, label shape: {label.shape}')
        return image, label

In [1398]:
# just to check if everything works we only use the sample of all data
frac = 0.3
# df = pd.read_csv(config.DF_PATH, usecols=['fname', 'height', 'width',
#                                           'xmin_alb', 'ymin_alb', 'xmax_alb', 'ymax_alb',
#                                           'label', 'kfold'])

df = pd.read_csv(config.DF_PATH, usecols=['fname',
                                          'xmin_alb', 'ymin_alb', 'xmax_alb', 'ymax_alb',
                                          'label', 'kfold']).sample(frac=frac).reset_index(drop=True)
df.shape

(1016, 7)

In [1399]:
# # checking for out of proportion pictures that are hard to transform and train
# df['ratio'] = np.maximum(df.height, df.width) / np.minimum(df.height, df.width)
# df.ratio.sort_values(ascending=False)

In [1400]:
# create dfs
train_df = df[df.kfold.isin([0,1,2])].reset_index(drop=True)
valid_df = df[df.kfold==3].reset_index(drop=True)
test_df = df[df.kfold==4].reset_index(drop=True)

# create dataset
train_dataset = dataset(train_df, train_transform)
valid_dataset = dataset(valid_df, valid_transform)
test_dataset = dataset(test_df, test_transform)

# create loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size)
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size)

In [1401]:
# just a check if all is good with the shapes of the loaders
# print(f'dataloader test: {next(iter(train_loader))[0].shape}')
print(f'dataloader test: {next(iter(train_loader))[0].shape}, {next(iter(valid_loader))[0].shape}')

dataloader test: torch.Size([32, 3, 256, 256]), torch.Size([32, 3, 256, 256])


In [1402]:
# for i in train_loader:
#     # print(len(i))
#     print(i[-1].shape)

In [1403]:
# train_dataset[0][0].shape

In [1404]:
# # displayig the data (looks this way because of normalization)
# batch_tensor = next(iter(train_loader))[0][:6,...]
# grid_img = torchvision.utils.make_grid(batch_tensor, nrow=3)

# # grid_img.shape
# plt.figure(figsize=(16,6))
# plt.imshow(grid_img.permute(1, 2, 0));

In [1405]:
model = models.resnet18_5().to(device) # good
# model = models.vgg().to(device) # good

# loss
criterion = nn.MSELoss()

# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)

# learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

In [1406]:
def train_model(n_epochs=1,
                model=model,
                train_loader=train_loader,
                valid_loader=valid_loader,
                criterion=criterion,
                optimizer=optimizer,
                lr_scheduler=lr_scheduler):
    
    total_time = time.time()
    print(f'================')
    print(f'started training...')
    
    for epoch in range(n_epochs):
        
        model.train()

        t0 = time.time()

        correct_on_epoch = 0 # train acc
        total_num_images = 0 # train acc
        epoch_loss = 0
        epoch_iou = []

        for batch, (images, labels) in enumerate(train_loader):
            # print(f'batch: {batch}')

            images = images.to(device, non_blocking=True)
            # print(f'image shape: {images.shape}')
            labels = labels.to(device, non_blocking=True)
            # print(f'labels shape: {labels.shape}')
            # print(f'labels shape: {labels.shape}')
            # print(labels)

            total_num_images += labels.size(0) # train acc

            optimizer.zero_grad()
            outputs = model(images)
            # print(f'outputs shape: {outputs.shape}')
            # print(f'outputs: {outputs[0]}')
            # print(outputs.shape)
            # print(outputs[:-1])
            # _, preds = torch.max(outputs, 1) # train acc
            preds = outputs[:,-1].round() # train acc
            # print(f'preds shape: {preds.shape}')
            # print(f'preds: {preds}')

            # print(f'outputs shape: {outputs.shape}')
            # print(f'labels shape: {labels.shape}')
            loss = criterion(outputs.float(), labels.float())
            # print(f'loss: {loss}')
            # print(f'loss shape: {loss.shape}')
            epoch_loss += loss

            # correct_on_epoch += (preds==labels).sum().item() # train acc
            correct_on_batch = (preds==labels[:,-1]).sum().item() # train acc
            # print(f'correct on batch: {correct_on_batch}')
            correct_on_epoch += correct_on_batch # train acc

            # batch iou
            batch_iou = iou(labels, outputs)
            epoch_iou.append(batch_iou)

            loss.backward()
            optimizer.step()
            # print(f'batch: {batch}')
        
        # train acc/loss
        train_epoch_acc = round((correct_on_epoch/total_num_images), 4) # train acc
        train_avg_epoch_loss = round(float(epoch_loss/len(train_loader)), 4)
        # valid acc/loss
        valid_avg_epoch_loss, valid_epoch_accuracy = test_model(model, valid_loader)
        # valid iou
        mean_iou = np.mean(epoch_iou)
        
        lr_scheduler.step()
        epoch_time = round(time.time() - t0)
        
        print(f'epoch: [{epoch+1}/{n_epochs}] | train loss: {train_avg_epoch_loss} | train acc: {train_epoch_acc} | valid loss: {valid_avg_epoch_loss} | valid acc: {valid_epoch_accuracy} | iou: {mean_iou} | time: {epoch_time//60:.0f}m {epoch_time%60:.0f}s')
    
    return model

In [1407]:
def test_model(model, test_loader):
    
    model.eval()
    
    correct_on_epoch = 0
    total_num_images = 0
    epoch_loss = 0

    all_batch_acc = []
    
    with torch.no_grad():
        
        for batch, (images, labels) in enumerate(test_loader):
            
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            
            total_num_images += labels.size(0)

            outputs = model(images)
            # _, preds = torch.max(outputs, 1)
            preds = outputs[:,-1].round()
            
            loss = criterion(outputs, labels)
            epoch_loss += loss

            # correct_on_epoch += (preds==labels).sum().item()
            correct_on_epoch += (preds==labels[:,-1]).sum().item()
    
            # test_batch_accuracy = accuracy_score(labels, preds) #temp
            # all_batch_acc.append(test_batch_accuracy) #temp

    test_epoch_accuracy = round((correct_on_epoch/total_num_images), 4)
    test_avg_epoch_loss = round(float(epoch_loss/len(test_loader)), 4)

    # total_acc = round(np.mean(all_batch_acc), 4) #temp
    
    # print(f'total acc with acc_score: {total_acc}') #temp
    return test_avg_epoch_loss, test_epoch_accuracy

In [1408]:
def iou(true_bb, pred_bb):

    xmin_t, ymin_t, xmax_t, ymax_t, _ = true_bb
    xmin_p, ymin_p, xmax_p, ymax_p, _ = pred_bb

    xmin_intersect = np.maximum(xmin_t, xmin_p)
    ymin_intersect = np.maximum(ymin_t, ymin_p)
    xmax_intersect = np.minimum(xmax_t, xmax_p)
    ymax_intersect = np.minimum(ymax_t, ymax_p)

    intersection_area = (xmax_intersect - xmin_intersect) * (ymax_intersect - ymin_intersect)
    union_area = (xmax_t - xmin_t) * (ymax_t - ymin_t) + (xmax_p - xmin_p) * (ymax_p - ymin_p) - intersection_area + 1e-6

    iou = intersection_area / union_area

    return round(iou, 4)

bb_1 = [0, 0, 0.5, 0.5, 1]
bb_2 = [0.25, 0, 0.75, 0.5, 1]

# test
iou(bb_1, bb_2)

0.3333

In [1409]:
# unfreeze all the params for training
def unfreeze(model=model):
    for param in model.parameters():
        param.requires_grad = True
    return model

In [1410]:
%%time

train_model(frozen)
unfreeze()
train_model(unfrozen)
print('')

started training...


ValueError: too many values to unpack (expected 5)

In [1363]:
checkpoint = {'model': model,
              'state_dict': model.state_dict()}

torch.save(checkpoint, config.MODEL_OUTPUT)

In [1364]:
%%time
# testing with train data
_, train_acc = test_model(model, train_loader)
print(f'test set acc: {train_acc}')

test set acc: 0.8903
Wall time: 44.1 s


In [1365]:
%%time
# testing with test data
_, test_acc = test_model(model, test_loader)
print(f'test set acc: {test_acc}')

test set acc: 0.9421
Wall time: 13.7 s


In [1366]:
%%time
# tta with test data
tta_crop = int(presize*0.9)
tta_model = tta.ClassificationTTAWrapper(model, tta.aliases.five_crop_transform(tta_crop, tta_crop))

tta_transforms = A.Compose([
    A.SmallestMaxSize(presize),
    A.Normalize(),
    albumentations.pytorch.ToTensorV2()
])

tta_dataset = dataset(valid_df, transform=tta_transforms)
tta_loader = DataLoader(tta_dataset, batch_size=1, shuffle=False) # num_workers=0 on cpu

_, tta_acc = test_model(tta_model, tta_loader)

print(f'TTA acc: {tta_acc}')

TTA acc: 0.9163
Wall time: 1min 25s


In [1367]:
total_time = time.time() - start
print(f'Total time: {total_time//60:.0f}m {total_time%60:.0f}s')

Total time: 9m 45s


In [1368]:
# BOX_COLOR = (255, 0, 0)  # Red
# TEXT_COLOR = (255, 255, 255)  # White


# def visualize_bbox(image, bboxes, class_name, color=BOX_COLOR, thickness=2):
#     """Visualizes a single bounding box on the image"""
#     x_min, y_min, x_max, y_max = bboxes
#     x_min, y_min, x_max, y_max = int(x_min*256), int(y_min*256), int(x_max*256), int(y_max*256)

#     cv2.rectangle(image, (x_min, y_min), (x_max, y_max),
#                   color=color, thickness=thickness)

#     ((text_width, text_height), _) = cv2.getTextSize(
#         class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
#     cv2.rectangle(image, (x_min, y_min - int(1.3 * text_height)),
#                   (x_min + text_width, y_min), BOX_COLOR, -1)
#     cv2.putText(
#         image,
#         text=class_name,
#         org=(x_min, y_min - int(0.3 * text_height)),
#         fontFace=cv2.FONT_HERSHEY_SIMPLEX,
#         fontScale=0.35,
#         color=TEXT_COLOR,
#         lineType=cv2.LINE_AA,
#     )
#     return image


# def visualize(image, bboxes, label, category_id_to_name):
#     image = image.copy()
#     for bboxes, category_id in zip(bboxes, label):
#         class_name = category_id_to_name[category_id]
#         image = visualize_bbox(image, bboxes, class_name)
#     plt.figure(figsize=(12, 12))
#     plt.axis('off')
#     plt.imshow(image)
#     # plt.show()

# image = train_dataset[0][0]
# image = np.array(image)
# print(f'image shape: {image.shape}')
# bbox = train_dataset[1][-1].flatten().tolist()
# bboxes = [bbox[:-1]]  # because there is for loop for the bboxes
# # print(f'bboxes: {bboxes}')
# label = [int(bbox[-1])]
# # print(f'label: {label}')
# category_id_to_name = {1: 'cat', 0: 'dog'}

# # transformed = train_transform(image=image)
# # transformed = train_transform(image=image, bboxes=bboxes)

# # visualize(
# #     transformed['image'],
# #     transformed['bboxes'])

# visualize(image, bboxes, label, category_id_to_name)
# # plt.imshow()