<a href="https://colab.research.google.com/github/shawal-mbalire/18446744073709551616/blob/main/yolov5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.9/777.9 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m54.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import cv2
import glob
import yaml
import random
import shutil
import pathlib
import numpy as np
import pandas as pd
from PIL import Image
from ultralytics import YOLO
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.model_selection import train_test_split

In [None]:
! wget https://storage.googleapis.com/air-lab-hackathon/Cocoa/cocoa_new.zip
! rm -rf sample_data
! unzip cocoa_new.zip
! rm cocoa_new.zip
! rm -rf __MACOSX

In [None]:
data_dir   = 'cocoa_new/train/'
data_dir   = pathlib.Path(data_dir).with_suffix('')
image_list = list(data_dir.glob('*.jpg'))
label_data = pd.read_csv('cocoa_new/train/label_map.csv')

def extract_bounding_boxes_norm(data_f, img_name):
  return data_f[data_f['Image id']==img_name][['xmin_norm', 'ymin_norm', 'xmax_norm', 'ymax_norm', 'Actual Label']].values

def extract_bounding_boxes(df, img_id):
  return df[df['Image id']==img_id][['xmin', 'ymin', 'xmax', 'ymax', 'Actual Label']].values

def generate_yolo_label(df, img_id, save_dir = 'yolo-data/base/labels/'):
  ext_boxes = extract_bounding_boxes_norm(df, img_id)
  img_width, img_height = (640,640)
  with open(save_dir+img_id[:-3]+'txt', 'w') as l_file:
    for b in ext_boxes:
      x1, y1, x2, y2, img_label =  b
      xc = x1+ (x2-x1)/2
      yc  = y1+ (y2-y1)/2
      l_file.write(f'{cocoa_label_map[img_label]} {x1:.6f} {y1:.6f} {xc:.6f} {yc:.6f}\n')

for d in ['base', 'train', 'val']:
  os.makedirs(f'yolo-data/{d}/images', exist_ok=True)
  os.makedirs(f'yolo-data/{d}/labels', exist_ok=True)
labels = label_data['Actual Label'].unique()
cocoa_label_map = {val:idx[0] for  idx, val in np.ndenumerate(labels)}

for i in image_list:
  generate_yolo_label(label_data, os.path.basename(i))

cocoa_label_map

In [None]:
base_labels = glob.glob('yolo-data/base/labels/*.txt')
print(f'Found {len(base_labels)} files.')
train, val = train_test_split(base_labels, test_size=0.1, random_state=42)
print(f'Train has {len(train)} samples')
print(f'Val has {len(val)} samples')

In [None]:
for f in train:
  i_name = os.path.basename(f)[:-3]+'jpg'
  shutil.copyfile(f, 'yolo-data/train/labels/'+os.path.basename(f))
  shutil.copyfile('cocoa_new/train/'+i_name, 'yolo-data/train/images/'+i_name)

for f in val:
  i_name = os.path.basename(f)[:-3]+'jpg'
  shutil.copyfile(f, 'yolo-data/val/labels/'+os.path.basename(f))
  shutil.copyfile('cocoa_new/train/'+i_name, 'yolo-data/val/images/'+i_name)

In [None]:
yaml_dict = {
    'path': '/content/yolo-data',
    'train': 'train/images',
    'val': 'val/images',
    'names': {idx[0]: val for  idx, val in np.ndenumerate(labels)}

}

with open('data_config.yaml', 'w+') as yml_file:
    yaml.dump(yaml_dict, yml_file)

In [None]:
batch = 16
epochs = 20
patience = 15
name = 'exp_v1.1'
optimizer = 'auto'
project = 'cocoa-training'
model_version = 'yolov5xu.pt'
data_config = '/content/data_config.yaml'

model = YOLO(model_version)
hist = model.train(
    imgsz=640,
    name=name,
    batch=batch,
    epochs=epochs,
    project=project,
    data=data_config,
    patience=patience,
    optimizer=optimizer
  )

# Make Predictions

In [None]:
test_data = glob.glob('/content/cocoa_new/test_new/*.jpg')
results = model.predict(
    test_data,
)
filenames = [os.path.basename(r.path) for idx, r in enumerate(results) for b in r.boxes.xyxyn.cpu().numpy()]
pred_boxes = np.concatenate([r.boxes.xyxyn.cpu().numpy() for r in results])
pred_classes = np.concatenate([r.boxes.cls.cpu().numpy() for r in results])
pred_confidence = np.concatenate([r.boxes.conf.cpu().numpy() for r in results])

In [None]:
cocoa_label_map_rev = {cocoa_label_map[key]:key for key in cocoa_label_map}
pred_labels = list(map(lambda x : cocoa_label_map_rev[x], list(pred_classes.astype(int))))
cocoa_label_map_rev

In [None]:
submission_df = pd.DataFrame(
    {
        'Image id': filenames,
        'Class': pred_labels,
        'Confidence': pred_confidence,
        'xmin': pred_boxes[:, 0],
        'ymin': pred_boxes[:, 1],
        'xmax': pred_boxes[:, 2],
        'ymax': pred_boxes[:, 3],
    }
)
submission_df.head()

In [None]:
submission_df.to_csv('18446744073709551616.csv', index=False)