In [None]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (25,16)
import numpy as np
import random
import torch
from torch import nn
import seaborn as sns
import pandas as pd
import os
import pathlib
import shutil
import cv2
import PIL
import sys

try:
    import google.colab
    IN_COLAB = True
    from google.colab import drive
    drive.mount('/content/drive')
except:
    IN_COLAB = False

TEXT_COLOR = 'black'

In [None]:
if not IN_COLAB:
    PROJECT_ROOT = pathlib.Path(os.path.join(os.curdir, os.pardir))
else:
    PROJECT_ROOT = pathlib.Path('')
    
DATA_DIR = PROJECT_ROOT / 'data'
NOTEBOOKS_DIR = PROJECT_ROOT / 'notebooks'

YOLO_MODEL_HOME_DIR = DATA_DIR / 'YoloV5'
AUGMENT_HOME_DIR = YOLO_MODEL_HOME_DIR / 'utils'

if YOLO_MODEL_HOME_DIR not in sys.path:
    sys.path.append(str(YOLO_MODEL_HOME_DIR))
    
if (NOTEBOOKS_DIR / 'full-gt.csv').is_file():
    full_gt = pd.read_csv(NOTEBOOKS_DIR / 'full-gt.csv')
else:
    full_gt = pd.read_csv(DATA_DIR / 'full-gt.csv')

FORMATED_GT_PATH = "formated_full_gt.csv"
FULL_GT_SRC_LEN = len(full_gt.index)

full_gt_unique_filenames = set(full_gt['filename'])
full_gt_unique_filenames_size = len(full_gt_unique_filenames)
%run utils.ipynb
import ast
import re

i = 0;

if os.path.isfile(FORMATED_GT_PATH):
    print("FORMATED GT EXIST. LOAD IT")
    formated_full_gt_df = pd.read_csv(FORMATED_GT_PATH, dtype=object)
    # display(formated_full_gt_df)
    formated_full_gt_df['coords'].replace({'\n ':',', ' \s+': ' ', '\[ ': '['}, regex=True, inplace=True)
    # display(formated_full_gt_df)
    formated_full_gt_df['coords'] = formated_full_gt_df['coords'].apply(
        lambda x: ast.literal_eval(x)
    )
    
    formated_full_gt_df['size'] = formated_full_gt_df['size'].apply(
        lambda x: ast.literal_eval(x)
    )
else:
    print("FORMATED GT DOESNT EXIST. CREATE IT")
    # get all original filenames
    full_gt_unique_filenames = set(full_gt['filename'])
    
    formated_full_gt_list = []

    import imagesize
    
    for src_filename_iterator in list(full_gt_unique_filenames):

        mask = np.in1d(full_gt['filename'], [src_filename_iterator])
        coord_data_arr = full_gt[mask][['x_from', 'y_from', 'width', 'height']].to_numpy()
        
        filepath = DATA_DIR / "rtsd-frames" / src_filename_iterator
        origW, origH = imagesize.get(filepath)
                
        rel_coord = []
        for coord in coord_data_arr:
            # make from x, y, dx, dx -> x1, y1, x2, y2
            CV2RectangleCoords = ConvertAbsTLWH2CV2Rectangle(coord)
   
            # make from x1, y1, x2, y2 -> x, y, w, h
            CV2CircleCoords = ConvertCV2Rectangle2CenterXYWH(CV2RectangleCoords)
            
            # make x, y, w, h -> relative x, y, w, h
            rel_instance = MakeRel(CV2CircleCoords, origW, origH)
            rel_coord.append(rel_instance)
            
        if i % 100 == 0:
            printProgressEnum(i, full_gt_unique_filenames_size)
        i += 1

        formated_full_gt_list.append([str(filepath), rel_coord, [origW, origH]])

    formated_full_gt_df = pd.DataFrame(formated_full_gt_list, columns=['filepath', 'coords', 'size'])
    formated_full_gt_df.to_csv("formated_full_gt.csv", index=False)

formated_full_gt_df.head()

In [None]:
import yaml

    
hyps_file = YOLO_MODEL_HOME_DIR / 'data/hyps' / "hyp.scratch.yaml"
with open(hyps_file, errors='ignore') as f:
    hyp = yaml.safe_load(f)

IMG_SIZE = 1280
batch_size = 1
train_loader, train_dataset = createDataLoaderAndDataSet(formated_full_gt_df, 
                                                         'train',
                                                         hyp_arg=hyp,
                                                         imgsz=IMG_SIZE, 
                                                         batch_size=batch_size, 
                                                         augment=False)

In [None]:
img, target, path, shape = train_dataset[0]
img = img.permute([1, 2, 0])
plt.imshow(img)
