In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# YOLOv5

## Config

In [None]:
import seaborn as sns
import ast
from tqdm import trange, tqdm
from colorama import Fore
from enum import Enum
from glob import glob

# For Data preparation
from sklearn.preprocessing import *
from sklearn.model_selection import *
from sklearn.metrics import *

In [None]:
class Config(Enum):
    def __str__(self):
        return self.value
    
    TRAIN_CSV = '../input/global-wheat-detection/train.csv'
    TEST_CSV = "../input/global-wheat-detection/sample_submission.csv"
    TRAIN_DIR = "../input/global-wheat-detection/train"
    TEST_DIR = "../input/global-wheat-detection/test"
    OUTPUT_PATH = "./yolov5/output"
    IMG_SHAPE = 1024
    CONFIG_FILENAME = "ws_data"
    EPOCHS = 20
    BATCH_SIZE = 8

In [None]:
def process_data(data_df, image_id_col, bbox_col, label_col, path_col, config_filename = 'data', test_size = 0.2):
    """
    Build dataset for Yolo training
        + Expect form: (label, x_center, y_center, width, height)
        + return df_train, df_val
    """
    os.system('git clone https://github.com/ultralytics/yolov5.git')
    OUTPUT_FOLDER_NAME = Config.OUTPUT_PATH.value.split('/')[-1]
    if not os.path.exists(Config.OUTPUT_PATH.value):
        os.system(
            f'''
                cd ./yolov5
                mkdir {OUTPUT_FOLDER_NAME} 
                cd {OUTPUT_FOLDER_NAME}
                mkdir images
                mkdir labels
                cd images
                mkdir train
                mkdir validation
                cd ..
                cd labels
                mkdir train
                mkdir validation
                cd ../../
                tree {OUTPUT_FOLDER_NAME}
                cd ../
            '''
        )
    
    # Convert string form of list to original form
    data_df.bbox = data_df.bbox.apply(ast.literal_eval)
    
    # Encoding all labels
    mapper = {k: d for d, k in enumerate(set(data_df[label_col]))}
    data_df[label_col] = data_df[label_col].apply(lambda x: int(mapper[x]))
    
    # Group the bounding boxes wrt image_id, label_col and path_col
    data_df = data_df.groupby(by = [image_id_col, label_col, path_col])[bbox_col].apply(list).reset_index(name = bbox_col)
    
    # Divide data into train and val set
    df_train, df_val = train_test_split(data_df, test_size = test_size, random_state = 1234, shuffle = True)
    df_train = df_train.reset_index(drop = True)
    df_val = df_val.reset_index(drop = True)    
    
    print(f"[INFO] Train_SHAPE : {df_train.shape}, VAL_SHAPE: {df_val.shape}")
    data_dict = {'train': df_train, 'validation': df_val}
    for data_type, data in data_dict.items():
        for idx in trange(len(data), desc=f'Processing {data_type}...', bar_format="{l_bar}%s{bar:50}%s{r_bar}" % (Fore.CYAN, Fore.RESET), position=0, leave=True):
            row = data.iloc[idx]
            image_name = row[image_id_col]
            bounding_boxes = row[bbox_col]
            label = row[label_col]
            path = row[path_col]
            yolo_data = []
            for bbox in bounding_boxes:
                x = bbox[0]
                y = bbox[1]
                w = bbox[2]
                h = bbox[3]
                
                x_center = x + w / 2
                y_center = y + h / 2
                
                x_center, y_center, w, h = tuple(map(lambda x: x/Config.IMG_SHAPE.value, (x_center, y_center, w, h)))
                yolo_data.append([label, x_center, y_center, w, h])
                
            yolo_data = np.array(yolo_data)
            np.savetxt(
                f"{Config.OUTPUT_PATH.value}/labels/{data_type}/{image_name}.txt",
                yolo_data,
                fmt = ["%d", "%f", "%f", "%f", "%f"]
            )
            os.system(
                f"""
                cp {path} {Config.OUTPUT_PATH.value}/images/{data_type}/{path.split("/")[-1]}

                """
            )
    with open(f"./yolov5/{config_filename}.yaml", "w+") as file_:
        file_.write(
            f"""
            
            train: {OUTPUT_FOLDER_NAME}/images/train
            val: {OUTPUT_FOLDER_NAME}/images/validation
            nc: {len(mapper)}
            names: {list(mapper.keys())}
            
            """
        )
    file_.close()
    print("[INFO] Done with data processing")   

## Load data

In [None]:
df = pd.read_csv(Config.TRAIN_CSV.value)
df_test = pd.read_csv(Config.TEST_CSV.value)

df['path'] = df.image_id.apply(lambda x: f'{Config.TRAIN_DIR.value}/{x}.jpg')
df_test['path'] = df_test.image_id.apply(lambda x: f'{Config.TEST_DIR.value}/{x}.jpg')

df['label'] = ['Wheat']*len(df)

print(df.dtypes)
df.head()

## Train YOLO model

In [None]:
process_data(data_df = df, 
             image_id_col = "image_id", 
             bbox_col = "bbox", 
             label_col = "label",
             path_col = "path", 
             config_filename = Config.CONFIG_FILENAME.value)

In [None]:
def train(model_name, config_filename, preTrainedWeights_path = None):
    mapper = {}
    for idx, model_ in enumerate(glob('yolov5/models/*yaml')):
        mapper[idx + 1] = model_
        print(f"{idx + 1} => {model_.split('/')[-1].split('.')[0]}")
    
    model = mapper[int(input(f'Select model from idx'))]
    if preTrainedWeights_path:
        os.system(
        f'''
        python yolov5/train.py --img {Config.IMG_SHAPE.value} --batch {Config.BATCH_SIZE.value} --epochs {Config.EPOCHS.value} --data yolov5/{config_filename}.yaml --cfg {model} --name {model_name} --weights {preTrainedweights_path}
        '''
        )
    else:
        os.system(
            f"""
                python yolov5/train.py --img {Config.IMG_SHAPE.value} --batch {Config.BATCH_SIZE.value} --epochs {Config.EPOCHS.value} --data yolov5/{config_filename}.yaml --cfg {model} --name {model_name}
            """
        )
train(model_name = 'ws_yolov5', config_filename = Config.CONFIG_FILENAME.value)

In [None]:
def predict(images_path:"path to the test images", weights_path: "path to the weights folder"):
    """
    Helper function to make predictions over images using Yolo
    """
    os.system(
        f"""
            python yolov5/detect.py --source {images_path} --weights {weights_path}
        """)

predict(images_path = "../input/global-wheat-detection/test",
       weights_path = "yolov5/runs/train/ws_yolov54/weights/best.pt")