In [289]:
#!g1.1
#%pip install opencv-python editdistance ml_collections wandb albumentations
#%pip install bezier
#%pip install typing_extensions --upgrade
#%pip install pandas -U
%pip install numpy==1.21

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/local/bin/python3 -m pip install --upgrade pip' command.[0m


In [1]:
#!g1.1
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF

import torchvision as tv

import numpy as np

import typing as tp
import os

import cv2

import matplotlib.pyplot as plt
import pandas as pd
import math
import itertools

from editdistance import eval as edit_distance

from tqdm import tqdm

import errno

from ml_collections import ConfigDict

import importlib
import wandb

import albumentations as A

import inspect

In [4]:
#!g1.1
from diploma_code.patched_hwb import HandWrittenBlot

# from diploma_code.model import (
#     ConvBlock, FusedInvertedBottleneck, ReduceBlock, Backbone, PositionalEncoding,
#     TransformerEncoder, CTCRawDecoder, CTCDecoderModel, ParallelModel, make_single_model, make_model
# )
from diploma_code.model_v2 import (
    Resnet34Backbone, BiLSTMEncoder, PositionalEncoding, TransformerEncoder, 
    CTCDecoderModel, ParallelModel, make_single_model_v2, make_model_v2
)
from diploma_code.transforms_torch import (
    VerticalRandomMasking, HorizontalResizeOnly, GaussianNoise, 
    RandomHorizontalStretch, RandomChoiceN
)
from diploma_code.char_encoder import (
    CharEncoder
)
# from diploma_code.data_loader.mjsynth import (
#     load_mjsynth_chars, load_mjsynth_samples
# )
from diploma_code.dataset import (
    BaseLTRDataset, LongLinesLTRDataset
)
from diploma_code.make_loader import (
    make_char_encoder, make_datasets, make_dataloaders
)
from diploma_code.optimizing import (
    pytorch_make_optimizer, StepLRWithWarmup, make_lr_scheduler
)
from diploma_code.utils import (
    log_metric_wandb, batch_to_device, seed_everything
)
from diploma_code.evaluation import (
    my_ctc_loss, my_dml_loss, decode_ocr_probs, get_edit_distance, 
    EpochValueProcessor, EpochDMLProcessor, CERProcessor
)
from diploma_code.trainer import (
    LTRTrainer
)

from diploma_code.config import default_diploma_config, get_config

from diploma_code.configs import BaseDatasetConfig
from diploma_code.configs import IamConfig



In [3]:
#!g1.1
import diploma_code
import diploma_code.data_loader

importlib.reload(diploma_code.char_encoder)

importlib.reload(diploma_code.config)
importlib.reload(diploma_code.dataset)

importlib.reload(diploma_code.data_loader)
importlib.reload(diploma_code.evaluation)

importlib.reload(diploma_code.optimizing)

importlib.reload(diploma_code.model_v2)
importlib.reload(diploma_code.utils)
importlib.reload(diploma_code.trainer)

importlib.reload(diploma_code.make_loader)
importlib.reload(diploma_code.make_transforms)

importlib.reload(diploma_code.transforms_functional)

importlib.reload(diploma_code.configs)





<module 'diploma_code.configs' from '/home/jupyter/work/resources/mutual_htr/mutual_htr/diploma_code/configs/__init__.py'>

In [5]:
#!g1.1
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mkafka_zhuk[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:
#!g1.1
cfg = get_config()
cfg.training.eval_epochs_interval = 1
cfg.training.eval_test_interval = 3
cfg.training.snapshot_epochs_interval = 3
cfg.data.root_path = '/home/jupyter/mnt/datasets/diploma/'
cfg.data.iam.path = '/home/jupyter/mnt/datasets/diploma/'

cfg.model.type = 'single'

cfg.device = 'cuda:0'

cfg.training.grad_clip_value = 1
cfg.training.epochs = 100
cfg.lr_scheduler.params.epochs = 100

cfg.training.checkpoint_path = './checkpoints_lstm_single_42'

cfg.optimizer.weight_decay = 1e-2

# cfg.training.load_from_checkpoint = True
# cfg.wandb.resume = True

In [6]:
#!g1.1
os.makedirs(cfg.training.checkpoint_path, exist_ok=True)

In [7]:
#!g1.1
seed_everything(42)

In [8]:
#!g1.1
ltr = LTRTrainer(cfg)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /tmp/xdg_cache/torch/hub/checkpoints/resnet34-b627a593.pth


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=87319819.0), HTML(value='')))




In [9]:
#!g1.1
#ltr.validate(mode='valid')

In [10]:
#!g1.1
ltr.train('happy_lstm_single_datasphere_42')

100%|██████████| 1207/1207 [06:45<00:00,  2.97it/s]
100%|██████████| 115/115 [00:36<00:00,  3.18it/s]
100%|██████████| 117/117 [00:36<00:00,  3.22it/s]
100%|██████████| 1207/1207 [06:45<00:00,  2.98it/s]
100%|██████████| 115/115 [00:35<00:00,  3.20it/s]
100%|██████████| 1207/1207 [06:44<00:00,  2.98it/s]
100%|██████████| 115/115 [00:36<00:00,  3.19it/s]
100%|██████████| 1207/1207 [06:44<00:00,  2.98it/s]
100%|██████████| 115/115 [00:36<00:00,  3.15it/s]
100%|██████████| 117/117 [00:36<00:00,  3.19it/s]
100%|██████████| 1207/1207 [06:45<00:00,  2.98it/s]
100%|██████████| 115/115 [00:36<00:00,  3.16it/s]
100%|██████████| 1207/1207 [06:45<00:00,  2.98it/s]
100%|██████████| 115/115 [00:36<00:00,  3.14it/s]
100%|██████████| 1207/1207 [06:45<00:00,  2.98it/s]
100%|██████████| 115/115 [00:36<00:00,  3.19it/s]
100%|██████████| 117/117 [00:36<00:00,  3.23it/s]
100%|██████████| 1207/1207 [06:45<00:00,  2.97it/s]
100%|██████████| 115/115 [00:36<00:00,  3.16it/s]


In [340]:
#!g1.1
!git add diploma_code

On branch develop_datasphere
Your branch and 'origin/develop_datasphere' have diverged,
and have 1 and 1 different commits each, respectively.
  (use "git pull" to merge the remote branch into yours)

You have unmerged paths.
  (fix conflicts and run "git commit")
  (use "git merge --abort" to abort the merge)

Changes to be committed:

	modified:   diploma_code/config.py

Unmerged paths:
  (use "git add <file>..." to mark resolution)

	both modified:   diploma_ctc_torch.ipynb

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	checkpoints_single_lstm_42/train_state.pth
	wandb/

