In [None]:
import matplotlib.pyplot as plt

import numpy as np
import random
import seaborn as sns
import pandas as pd
import os
import pathlib
import shutil
import cv2
import sys

%cd adas_system/notebooks

try:
    USE_TPU = bool(os.environ['COLAB_TPU_ADDR'])
except:
    USE_TPU = False

if USE_TPU:
    # !pip uninstall pytorch
    # !pip install cloud-tpu-client==0.10 torch==1.10.0
    # !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
    !pip install cloud-tpu-client==0.10 torch==1.9.0 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl
    import torch_xla
    import torch_xla.core.xla_model as xm
    USE_TPU = True

else:
    USE_TPU = False

import torch
from torch import nn

try:
    import google.colab
    IN_COLAB = True
    from google.colab import drive
    drive.mount('/content/drive')
    if not os.path.isfile('1_ClassifierResearch.ipynb'):
        print('already exist')
        !git clone --branch 9_SignDetector https://github.com/lsd-maddrive/adas_system.git
        %cd adas_system/notebooks
        !mkdir ../data/rtsd-frames
        !unzip -j -q /content/drive/MyDrive/USER_FULL_FRAMES.zip -d ./../data/rtsd-frames
        !pwd
        !ls

except:
    IN_COLAB = False

###
import nt_helper
from nt_helper.helper_utils import *
###

TEXT_COLOR = 'black'

# Зафиксируем состояние случайных чисел
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
torch.manual_seed(RANDOM_STATE)
random.seed(RANDOM_STATE)
%matplotlib inline
plt.rcParams["figure.figsize"] = (17,10)

if USE_TPU:
    device = xm.xla_device()
else:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if not IN_COLAB:
    PROJECT_ROOT = pathlib.Path(os.path.join(os.curdir, os.pardir))
else:
    PROJECT_ROOT = pathlib.Path('..')
    
DATA_DIR = PROJECT_ROOT / 'data'
NOTEBOOKS_DIR = PROJECT_ROOT / 'notebooks'

if (NOTEBOOKS_DIR / 'full-gt.csv').is_file():
    full_gt = pd.read_csv(NOTEBOOKS_DIR / 'full-gt.csv')
else:
    full_gt = pd.read_csv(DATA_DIR / 'full-gt.csv')

FORMATED_GT_PATH = "formated_full_gt.csv"
FULL_GT_SRC_LEN = len(full_gt.index)

if os.path.isfile(FORMATED_GT_PATH):
    print("FORMATED GT EXIST. LOAD IT")
    
    import ast
    
    formated_full_gt_df = pd.read_csv(FORMATED_GT_PATH, dtype=object)
    formated_full_gt_df['coords'].replace({'\n ':',', ' \s+': ' ', '\[ ': '['}, regex=True, inplace=True)
    
    formated_full_gt_df['coords'] = formated_full_gt_df['coords'].apply(
        lambda x: ast.literal_eval(x)
    )
    
    formated_full_gt_df['size'] = formated_full_gt_df['size'].apply(
        lambda x: ast.literal_eval(x)
    )

    formated_full_gt_df['filepath'] = formated_full_gt_df['filepath'].apply(
        lambda x: x.replace('\\', '/')
    )
else:
    print("FORMATED GT DOESNT EXIST. CREATE IT")
    
    # get all original filenames
    full_gt_unique_filenames = set(full_gt['filename'])
    full_gt_unique_filenames_size = len(full_gt_unique_filenames)
    
    formated_full_gt_list = []

    import imagesize
    i = 0
    for src_filename_iterator in list(full_gt_unique_filenames):

        mask = np.in1d(full_gt['filename'], [src_filename_iterator])
        coord_data_arr = full_gt[mask][['x_from', 'y_from', 'width', 'height']].to_numpy()
        
        filepath = DATA_DIR / "rtsd-frames" / src_filename_iterator
        origW, origH = imagesize.get(filepath)
                
        rel_coord = []
        for coord in coord_data_arr:
            # make from x, y, dx, dx -> x1, y1, x2, y2
            CV2RectangleCoords = ConvertAbsTLWH2CV2Rectangle(coord)
   
            # make from x1, y1, x2, y2 -> x, y, w, h
            CV2CircleCoords = ConvertCV2Rectangle2CenterXYWH(CV2RectangleCoords)
            
            # make x, y, w, h -> relative x, y, w, h
            rel_instance = MakeRel(CV2CircleCoords, origW, origH)
            rel_coord.append(rel_instance)
            
        if i % 100 == 0:
            printProgressEnum(i, full_gt_unique_filenames_size)
        i += 1

        formated_full_gt_list.append([str(filepath), rel_coord, [origW, origH]])

    formated_full_gt_df = pd.DataFrame(formated_full_gt_list, columns=['filepath', 'coords', 'size'])
    formated_full_gt_df.to_csv("formated_full_gt.csv", index=False)

if 'set' in formated_full_gt_df.columns:
    print('SET ALREADY EXIST')
else:
    print('SET DOESNT EXIST. LETS CREATE IT')
    formated_full_gt_df_index_count = len(formated_full_gt_df.index)
    TRAIN_SIZE = round(0.7 * formated_full_gt_df_index_count)
    VALID_SIZE = round(0.2 * formated_full_gt_df_index_count)
    TEST_SIZE = round(formated_full_gt_df_index_count - TRAIN_SIZE - VALID_SIZE)
        
    assert TRAIN_SIZE + VALID_SIZE + TEST_SIZE == formated_full_gt_df_index_count, 'wrong split'
    set_series = pd.Series('test', index=range(TEST_SIZE)).append(
        pd.Series('train', index=range(TRAIN_SIZE)).append(
            pd.Series('valid', index=range(VALID_SIZE))
        )
    ).sample(frac=1).reset_index(drop=True)
    formated_full_gt_df['set'] = set_series
    formated_full_gt_df.to_csv("formated_full_gt.csv", index=False)
    
display(formated_full_gt_df.head())

In [None]:
import yaml

hyps_file = "hyp.scratch.yaml"
with open(hyps_file, errors='ignore') as f:
    hyp = yaml.safe_load(f)

import nt_helper
from nt_helper.helper_utils import *


IMG_SIZE = 640
batch_size = 160

%load_ext autoreload
%autoreload 2

from nt_helper.helper_utils import createDataLoaderAndDataSet

train_loader, train_dataset = createDataLoaderAndDataSet(formated_full_gt_df, 
                                                         'train',
                                                         hyp_arg=hyp,
                                                         imgsz=IMG_SIZE, 
                                                         batch_size=batch_size, 
                                                         augment=False)
train_dataset[0][0].shape