This notebook allows to install YOLOv5 requirements, reformat training and test data


In [1]:
import pandas as pd
import os
import pickle
import matplotlib.pyplot as plt
import ast
import glob
import shutil
import sys
import numpy as np
import imagesize
import cv2
from tqdm.notebook import tqdm
from typing import List
import torch
from torchvision.ops import box_iou
from typing import List
import torch
from torchvision.ops import box_iou
import warnings
warnings.filterwarnings('ignore')

In [2]:
import cv2
import torch
from PIL import Image


In [3]:
# !git clone https://github.com/ultralytics/yolov5
# %cd yolov5
# !pip install -r requirements.txt

# Train YOLOv5s on COCO128 for 1000 epochs using this command line 
# $ python train.py --img 640 --batch 16 --epochs 1000 --data starfish.yaml --weights yolov5s.pt
# starfish.yaml -> path to train/test data

In [4]:
# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) #autoshape=False for post-training with 1 class

Using cache found in /home/peter/.cache/torch/hub/ultralytics_yolov5_master
fatal: not a git repository (or any of the parent directories): .git
YOLOv5 🚀 2022-4-11 torch 1.10.1+cu102 CUDA:0 (GeForce GTX 1080, 8112MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.5 GFLOPs


In [5]:
# Check thatg everything is working 

# for f in 'zidane.jpg', 'bus.jpg':
#     torch.hub.download_url_to_file('https://ultralytics.com/images/' + f, f)  # download 2 images
# im1 = Image.open('zidane.jpg')  # PIL image
# im2 = cv2.imread('bus.jpg')[..., ::-1]  # OpenCV image (BGR to RGB)
# imgs = [im1, im2]  # batch of images

# # Inference
# results = model(imgs, size=640)  # includes NMS

# # Results
# results.print()  
# results.save()  # or .show()

# results.xyxy[0]  # im1 predictions (tensor)
# results.pandas().xyxy[0]  # im1 predictions (pandas)


In [3]:
ROOT_DIR  = '/media/peter/2TB/julien/mlproject/great-barrier-reef'
WORKING_DIR  = '/media/peter/2TB/julien/mlproject/great-barrier-reef'
def get_path(row):
    row['image_path'] = f'{ROOT_DIR}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    row['label_path'] = f'{ROOT_DIR}/datasets/labels/video_{row.video_id}_{row.video_frame}.txt'
    return row


In [4]:
df = pd.read_csv(f'{ROOT_DIR}/train.csv')
df = df.apply(get_path, axis=1)
df['annotations'] = df['annotations'].apply(lambda x: ast.literal_eval(x))
display(df.head(2))

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,image_path,label_path
0,0,40258,0,0,0-0,[],/media/peter/2TB/julien/mlproject/great-barrie...,/media/peter/2TB/julien/mlproject/great-barrie...
1,0,40258,1,1,0-1,[],/media/peter/2TB/julien/mlproject/great-barrie...,/media/peter/2TB/julien/mlproject/great-barrie...


In [5]:
df.shape

(23501, 8)

In [8]:
df['num_bbox'] = df['annotations'].apply(lambda x: len(x))
data = (df.num_bbox>0).value_counts()/len(df)*100
print('% images without annotations: {}'.format(data[0]))
print('% images with  annotations: {} '.format(data[1]))


% images without annotations: 79.06897578826432
% images with  annotations: 20.93102421173567 


In [9]:
df = df.query("num_bbox>0")

In [10]:
# COnverting annotations from coco to yolo
def coco2yolo(image_height, image_width, bboxes):
    """
    coco => [xmin, ymin, w, h]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # normalizinig
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]/ image_height
    
    # conversion (xmin, ymin) => (xmid, ymid)
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]/2
    
    return bboxes

def yolo2coco(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes

def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['image_path'])
    return row

In [10]:
# df.iloc[100]['annotations']

[{'x': 276, 'y': 631, 'width': 116, 'height': 88}]

In [11]:
df['bboxes'] = df.annotations.apply(get_bbox)
df = df.apply(get_imgsize,axis=1)
display(df.width.unique(), df.height.unique())
display(df.head(2))

array([1280])

array([720])

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,...,label_path,num_bbox,bboxes,width,height
16,0,40258,16,16,0-16,...,/media/peter/2TB/julien/mlproject/great-barrie...,1,"[[559, 213, 50, 32]]",1280,720
17,0,40258,17,17,0-17,...,/media/peter/2TB/julien/mlproject/great-barrie...,1,"[[558, 213, 50, 32]]",1280,720


In [12]:
# from sklearn.model_selection import GroupKFold
# kf = GroupKFold(n_splits = 5) 
# df = df.reset_index(drop=True)
# df['fold'] = -1
# for fold, (train_idx, val_idx) in enumerate(kf.split(df, y = df.video_id.tolist(), groups=df.sequence)):
#     df.loc[val_idx, 'fold'] = fold
# display(df.fold.value_counts())

0    1100
3     970
2     968
4     945
1     936
Name: fold, dtype: int64

In [13]:
# val_df = df[df['fold']==2]
# train_df = df[df['fold']!=2]
# print(train_df.shape)
# print(val_df.shape)


(3951, 13)
(968, 13)


In [7]:
n_video_0 = 0
n_video_1 = 1
n_video_2 = 2

for i in range(df.shape[0]):
    if df['video_id'][i] == 0:
        n_video_0+= 1
    if df['video_id'][i] == 1:
        n_video_1+= 1
    if df['video_id'][i] == 2:
        n_video_2+= 1


In [8]:
cmp = 0
for i in range(df.shape[0]):
    if df.loc[i, 'video_id'] == 0:
        cmp+= 1
        if cmp > n_video_0*0.8:
            df.loc[i, 'fold'] = 2


In [9]:
cmp = 0
for i in range(df.shape[0]):
    if df.loc[i, 'video_id'] == 1:
        cmp+= 1
        if cmp > n_video_1*0.8:
            df.loc[i, 'fold'] = 2


In [10]:
cmp = 0
for i in range(df.shape[0]):
    if df.loc[i, 'video_id'] == 2:
        cmp+= 1
        if cmp > n_video_2*0.8:
            df.loc[i, 'fold'] = 2


In [25]:
# train_df.iloc[row_idx].label_path[:62] + 'training_data' + train_df.iloc[row_idx].label_path[61:]

'/media/peter/2TB/julien/mlproject/great-barrier-reef/datasets/training_data/labels/video_0_5048.txt'

In [27]:
# Write labels in correct format

cnt = 0
for row_idx in (range(train_df.shape[0])):
    row = train_df.iloc[row_idx]
    image_height = row.height
    image_width = row.width
    bboxes_coco = np.asarray(row.bboxes).astype(np.float32).copy()
    num_bbox = len(bboxes_coco)
    labels = [0]*num_bbox
  
    label_path = train_df.iloc[row_idx].label_path[:62] + 'training_data' + train_df.iloc[row_idx].label_path[61:]
    f = open(label_path, 'w')

    if num_bbox < 1:
        annot = ''
        f.write(annot)
        f.close()
        cnt += 1
        continue
  
    bboxes_yolo  = coco2yolo(image_height, image_width, bboxes_coco)

    for i in range(len(bboxes_yolo)):
        annot = [str(labels[i])] + list(bboxes_yolo[i].astype(str)) + (['\n'] if num_bbox!=(i+1) else [''])
        annot = ' '.join(annot)
        annot = annot.strip(' ')
        f.write(annot)
    f.close()



In [59]:
val_df = df[df['fold']==2]
train_df = df[df['fold']!=2]
print(train_df.shape)
print(val_df.shape)


(3951, 13)
(968, 13)


In [61]:
# # # Move labels from obj/ to test/ directory
# def mv_labels (row):
#     old_path = row.label_path
#     filename = row.label_path.split('/')[-1]
#     new_path = '/'.join(row.label_path.split('/')[:-2]) + '/test/' + filename
#     row['label_path'] = new_path
#     shutil.move(old_path, new_path)
#     return row

# val_df= val_df.apply(lambda x: mv_labels(x), axis=1)
# val_df.head(2)


In [28]:
def copy_images (row):
    old_path = row.image_path
    new_path = row.image_path[:53] + 'datasets/training_data/images' + row.image_path[65:73] +'_'+row.image_path[74:]
    shutil.copy(old_path, new_path)
train_df.apply(lambda x: copy_images(x), axis=1)
# train_df.apply(lambda x: copy_images(x), axis=1)


0       None
1       None
2       None
3       None
4       None
        ... 
4914    None
4915    None
4916    None
4917    None
4918    None
Length: 3951, dtype: object