In [None]:
%cd ../input

In [None]:
%cd cvdl-task2

In [None]:
from abbyy_course_cvdl_t2.impl.data import CocoTextDetection, CocoDetectionPrepareTransform
from abbyy_course_cvdl_t2.convert import ObjectsToPoints
from abbyy_course_cvdl_t2.loss import CenterNetLoss
from abbyy_course_cvdl_t2.network import CenterNet

In [None]:
%ls ../cv-task3-data/task3

In [None]:
%cd ../cv-task3-data/task3/

In [None]:
%ls

In [None]:
from pathlib import Path
import numpy as np
import torch
import math
import warnings
from tqdm import tqdm
from abbyy_course_cvdl_t3.coco_text import COCO_Text

In [None]:
# base = Path("D:\\data\\Coco")
# anno_path = base / 'cocotext.v2.json'
# images_path = base / 'train2014'

anno_path = Path("./abbyy_course_cvdl_t3/data/cocotext.v2.json")
images_path = Path("../../coco-2014-dataset-for-yolov3/coco2014/images/train2014")

assert anno_path.exists(), "Set your own path to annotation"
assert images_path.exists(), "Set your own path to images"


СOCO_Text взят из репозитория авторов датасета
https://github.com/andreasveit/coco-text/

In [None]:
ct = COCO_Text(anno_path)

#### Пример работы из репозитория coco-text
Взято из 
https://github.com/andreasveit/coco-text/blob/master/coco_text_Demo.ipynb

In [None]:
# get all images containing at least one instance of legible text
imgIds = ct.getImgIds(imgIds=ct.train, 
                    catIds=[('legibility','legible')])
# pick one at random
img = ct.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]

In [None]:
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = (10.0, 8.0)

In [None]:
plt.imshow(plt.imread(images_path / img['file_name']))
plt.axis(False)
plt.title("Изображение из датасета COCO")

In [None]:
plt.imshow(plt.imread(images_path / img['file_name']))
annIds = ct.getAnnIds(imgIds=img['id'])
anns = ct.loadAnns(annIds)
ct.showAnns(anns)
plt.title("Изображение с GT детекциями текста")

### Подготовка датасетов

In [None]:
!pip install editdistance

In [None]:
from abbyy_course_cvdl_t3.coco_text import COCO_Text
from abbyy_course_cvdl_t3 import coco_evaluation


In [None]:
ds_train = CocoTextDetection(
    images_path,
    Path(anno_path),
    transforms=CocoDetectionPrepareTransform(size=(640, 640))
)

In [None]:
ds_val = CocoTextDetection(
    images_path,
    Path(anno_path),
    transforms=CocoDetectionPrepareTransform(size=(640, 640)),
    split='val'
)

### Подготовка модели


In [None]:
obj_to_points = ObjectsToPoints(hw=160, num_classes=1, smooth_kernel_size=3)
loss = CenterNetLoss(obj_to_points=obj_to_points)

In [None]:
net = CenterNet(head_kwargs={'c_classes': 1}, nms_kwargs={'kernel_size': 3})
crit = CenterNetLoss(obj_to_points=obj_to_points)

In [None]:
gpu = torch.device('cuda:0')

In [None]:
net = net.to(gpu);

### Тренировка

In [None]:
from abbyy_course_cvdl_t3.utils import dump_detections_to_cocotext_json
from abbyy_course_cvdl_t3.utils import evaluate_ap_from_cocotext_json

In [None]:
def train(train_dataset, val_dataset, *, net=None, criterion=None, 
          train_batch_size=1, val_batch_size = 1,  lr=3e-4, 
          epochs=1, image_size=(640, 640), device=None):
    if net is None:
        net = CenterNet(pretrained=True)
    if criterion is None:
        criterion = CenterNetLoss()

    if device is not None:
        net.to(device)
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    trainloader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=2
    )
    valloader = torch.utils.data.DataLoader(
        val_dataset, batch_size=val_batch_size, shuffle=False, num_workers=2
    )
    stats_step = (len(train_dataset) // 500 // train_batch_size) + 1
    for epoch in range(epochs):
        if epoch == 0:
            # на первой эпохе учимся с малым lr, чтобы не сломать pretrain
            optimizer.lr = lr / 1000
        else:
            # дальше постепенно уменьшаем
            optimizer.lr = lr / 2**epoch

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, anno = data
            print('inputs.shape = ', inputs.shape)
            inputs = inputs.to(device)
            anno = anno.to(device)
            optimizer.zero_grad()
            
            outputs = net(inputs)
            print('outputs.shape = ', outputs.shape)
            print('anno.shape = ', anno.shape)
            losses = criterion(outputs, anno).mean(axis=0)
            loss_value = losses.sum()
            if torch.isnan(loss_value).any():
                warnings.warn("nan loss! skip update")
                print(f"last loss: {[l.item() for l in losses]}")
                print(inputs.cpu().numpy())
                print(outputs[0].detach().cpu().numpy())
                continue
            running_loss += loss_value
            if (i % stats_step == 0):
                print(f"epoch {epoch}|{i}; total loss:{running_loss / stats_step}")
                print(f"last losses: {[l.item() for l in losses.flatten()]}")
                running_loss = 0.0
            
            loss_value.backward()
            optimizer.step()
            
            net.eval()
            prepared_preds = []
            img_ids = []

        with torch.no_grad():
            for num, data in enumerate(valloader):
                x, target = data
                x = x.to(device)
                target = target.to(device)
                pred = net(x)
                prepared_preds.append(
                    pred,
                    size_src=[img_meta['width'], img_meta['height']], 
                    size_current=[160, 160]
                )
                
                scores = np.concatenate([u['scores'] for u in prepared_preds])
                boxes = np.concatenate([u['boxes'] for u in prepared_preds], axis=0)

            dump_detections_to_cocotext_json(
                image_ids = image_ids.tolist(),
                xlefts=boxes[:, 0].tolist(),
                ytops=boxes[:, 1].tolist(),
                widths=boxes[:, 2].tolist(),
                heights=boxes[:, 3].tolist(),
                scores=scores.tolist(),
                path='../../../working/predictions.json'
            )
            ap, prec, rec = evaluate_ap_from_cocotext_json(
                coco_text=ct,
                path='../../../working/predictions.json'
            )
            
            torch.save(net, '../../../working/centralnet_coco_text_third_try_epoch_' + str(i) + '.pth')
            last_path = '../../../working/centralnet_coco_text_third_try_epoch_' + str(i) + '.pth'
            print(f"Итоговый скор AP на val: {ap}")
            print('saved')
            net.train()
            
    print('Finished Training')
    return net


### Запуск тренировки

In [None]:
net = train(ds_train, ds_val, net=net, criterion=crit, train_batch_size = 1, val_batch_size = 1, epochs=1, device=torch.device('cuda:0'), lr=1e-3)

### Сохраняем модель на диск

In [None]:
# %ls ../../..

In [None]:
# torch.save(model, '../../../working/retina_coco_text_first_try_epoch_' + str(t) + '.pth')   

In [None]:
# model.eval();

In [None]:
"""img_id = 101243
img_meta = ct.loadImgs(ids=[img_id])[0]
plt.imshow(plt.imread(images_path / img_meta['file_name']))
annIds = ct.getAnnIds(imgIds=img_meta['id'])
anns = ct.loadAnns(annIds)
ct.showAnns(anns)
plt.title(f"GT: {img_meta['id']}")"""

In [None]:
# img, anno = ds_val[ds_val.ids.index(str(img_id))]
# preds = model([img.to(gpu)])[0]

In [None]:
"""import torchvision

plt.imshow(
    torchvision.utils.draw_bounding_boxes(
        (img * 255 ).type(torch.uint8), 
        preds['boxes'],
    ).permute(1, 2, 0),
)
plt.title("Pred: все боксы")"""

In [None]:
"""high_confidence_scores = preds['scores'] > 0.35
high_confidence_boxes = preds['boxes'][high_confidence_scores]
plt.imshow(
    torchvision.utils.draw_bounding_boxes(
        (img * 255 ).type(torch.uint8), 
        high_confidence_boxes
    ).permute(1, 2, 0),
)
plt.title("Pred: боксы с score > 0.35")"""

In [None]:
"""prepared_preds = []
img_ids = []

for num, img_id in enumerate(tqdm(ds_val.ids)):
    img_id = int(img_id)
    img_meta = ct.loadImgs(ids=[img_id])[0]
    with torch.no_grad():
        x = ds_val[num][0]
        pred = model([
            x.to(gpu)
        ])[0]
        prepared_preds.append(
            postprocess(
                pred,
                size_src=[img_meta['width'], img_meta['height']], 
                size_current=[640, 640]
            )
        )
        img_ids.append(img_id)"""

In [None]:
"""from abbyy_course_cvdl_t3.utils import dump_detections_to_cocotext_json
scores = np.concatenate([u['scores'] for u in prepared_preds])
boxes = np.concatenate([u['boxes'] for u in prepared_preds], axis=0)
image_ids = []
for num, i in enumerate(img_ids):
    image_ids += [i] * len(prepared_preds[num]['boxes'])
image_ids = np.array(image_ids)"""

In [None]:
"""dump_detections_to_cocotext_json(
    image_ids = image_ids.tolist(),
    xlefts=boxes[:, 0].tolist(),
    ytops=boxes[:, 1].tolist(),
    widths=boxes[:, 2].tolist(),
    heights=boxes[:, 3].tolist(),
    scores=scores.tolist(),
    path='../../../working/predictions.json'
);"""

In [None]:
"""from abbyy_course_cvdl_t3.utils import evaluate_ap_from_cocotext_json
ap, prec, rec = evaluate_ap_from_cocotext_json(
    coco_text=ct,
    path='../../../working/predictions.json'
)
print(f"Итоговый скор AP на val: {ap}")"""

In [None]:
"""from matplotlib import pyplot as plt
plt.plot(prec, rec)
plt.xlabel('precision')
plt.ylabel('recall')
plt.title('PR curve')
plt.grid()"""