In [1]:
!pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt -q
!git clone https://github.com/ultralytics/yolov5.git

Cloning into 'yolov5'...
remote: Enumerating objects: 7284, done.[K
remote: Counting objects: 100% (390/390), done.[K
remote: Compressing objects: 100% (228/228), done.[K
remote: Total 7284 (delta 245), reused 273 (delta 162), pack-reused 6894[K
Receiving objects: 100% (7284/7284), 9.19 MiB | 20.55 MiB/s, done.
Resolving deltas: 100% (4984/4984), done.


In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from sklearn.model_selection import train_test_split, StratifiedKFold, GroupKFold
import fastprogress
import shutil
import ast

In [3]:
def decodeString(BoxesString):
    if BoxesString == "no_box":
        return np.zeros((0,4))
    else:
        try:
            boxes =  np.array([np.array([int(i) for i in box.split(" ")])
                              for box in BoxesString.split(";")])
            return boxes
        except:
            print(BoxesString)
            print("Submission is not well formatted. empty boxes will be returned")
            return np.zeros((0,4))

In [4]:
df = pd.read_csv('../input/wheat-x0/train.csv')
bboxs = df['BoxesString'].apply(decodeString)
dfs = []
for i in fastprogress.progress_bar(range(len(df))):
    temp = pd.DataFrame(bboxs[i], columns=['x', 'y', 'x2', 'y2'])
    temp['image_id'] = df.loc[i, 'image_name']
    temp.rename(columns={'x': 'x1', 'y': 'y1'}, inplace=True)
    dfs.append(temp)

In [5]:
df = pd.read_csv('../input/global-wheat-detection/train.csv')
df.drop(columns=['width', 'height', 'source'], inplace=True)
df['bbox'] = df['bbox'].apply(ast.literal_eval)
df['x1'] = df['bbox'].apply(lambda x: x[0]).astype(np.int32)
df['y1'] = df['bbox'].apply(lambda x: x[1]).astype(np.int32)
df['x2'] = df['x1'] + df['bbox'].apply(lambda x: x[2]).astype(np.int32)
df['y2'] = df['y1'] + df['bbox'].apply(lambda x: x[3]).astype(np.int32)
df.drop(columns='bbox', inplace=True)

In [6]:
df = pd.concat([*dfs, df], ignore_index=True)
df.rename(columns={'x1': 'x', 'y1': 'y'}, inplace=True)
df['x_center'] = (df['x'] + df['x2'])/2
df['y_center'] = (df['y'] + df['y2'])/2
df['w'] = df['x2'] - df['x']
df['h'] = df['y2'] - df['y']
df['classes'] = 0
df = df[['image_id','x', 'y', 'w', 'h','x_center','y_center']]
df = df.sort_values('image_id').reset_index(drop=True)
df.to_csv('train.csv', index=False)

In [7]:
img_dir = '../input/wheat-x0/train'
temp_img_dir = '/tmp/wheat/images'
temp_label_dir = '/tmp/wheat/labels'
os.makedirs(f'{temp_img_dir}/train', exist_ok=True)
os.makedirs(f'{temp_img_dir}/valid', exist_ok=True)
os.makedirs(f'{temp_label_dir}/train', exist_ok=True)
os.makedirs(f'{temp_label_dir}/valid', exist_ok=True)

In [8]:
gfs = GroupKFold(5)
df['fold'] = 0
for k, (train_ids, val_ids) in enumerate(gfs.split(df, groups=df['image_id'])):
    df.loc[val_ids, 'fold'] = k

In [9]:
temp_df = df['image_id,fold'.split(',')].drop_duplicates().reset_index(drop=True)

In [10]:
fold = 0
train_ids = temp_df.loc[temp_df['fold']!=fold, 'image_id'].reset_index(drop=True).values.tolist()
val_ids = temp_df.loc[temp_df['fold']==fold, 'image_id'].reset_index(drop=True).values.tolist()

In [11]:
train_ids.extend(list(set(map(lambda x: x[:-4], os.listdir('../input/wheat-x0/train'))) - set(df['image_id'].unique())))
train_ids.extend(list(set(map(lambda x: x[:-4], os.listdir('../input/global-wheat-detection/train'))) - set(df['image_id'].unique())))

In [12]:
for f in fastprogress.progress_bar(train_ids): 
    img_dir = '../input/global-wheat-detection/train' if len(f) == 9 else '../input/wheat-x0/train'
    ext = 'png' if len(f) != 9 else 'jpg'
    shutil.copyfile(f'{img_dir}/{f}.{ext}', f'{temp_img_dir}/train/{f}.{ext}')
    
for f in fastprogress.progress_bar(val_ids): 
    img_dir = '../input/global-wheat-detection/train' if len(f) == 9 else '../input/wheat-x0/train'
    ext = 'png' if len(f) != 9 else 'jpg'
    shutil.copyfile(f'{img_dir}/{f}.{ext}', f'{temp_img_dir}/valid/{f}.{ext}')

In [13]:
# Create .yaml file 
import yaml

data_yaml = dict(
    train = '/tmp/wheat/images/train',
    val = '/tmp/wheat/images/valid',
    nc = 2,
    names = ['none', 'wheat_head']
)

with open('data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)
    
%cat data.yaml

{names: [none, wheat_head], nc: 2, train: /tmp/wheat/images/train, val: /tmp/wheat/images/valid}


In [14]:
import pickle

with open('train.pkl', 'wb') as f:
    pickle.dump(train_ids, f)
    
with open('val.pkl', 'wb') as f:
    pickle.dump(val_ids, f)

In [15]:
df['domain'] = 1
for name, temp_df in df[df['image_id'].isin(train_ids)].groupby('image_id'):
    temp_df['x_center,y_center,w,h'.split(',')] = temp_df['x_center,y_center,w,h'.split(',')]/1024
    cont = temp_df[['domain', 'x_center', 'y_center', 'w', 'h']].astype(np.float32).values.astype(str)
    with open(f'{temp_label_dir}/train/{name}.txt', 'w+') as file_in:
        for j in range(len(cont)):
            text = ' '.join(cont[j])
            file_in.write(text)
            file_in.write("\n")
            
for name, temp_df in df[df['image_id'].isin(val_ids)].groupby('image_id'):
    temp_df['x_center,y_center,w,h'.split(',')] = temp_df['x_center,y_center,w,h'.split(',')]/1024
    cont = temp_df[['domain', 'x_center', 'y_center', 'w', 'h']].astype(np.float32).values.astype(str)
    with open(f'{temp_label_dir}/valid/{name}.txt', 'w+') as file_in:
        for j in range(len(cont)):
            text = ' '.join(cont[j])
            file_in.write(text)
            file_in.write("\n")

In [16]:
!WANDB_MODE="dryrun" python yolov5/train.py --img 1024 --batch 5 --epochs 10 --data data.yaml --weights yolov5x6.pt

[34m[1mgithub: [0mskipping check (not a git repository), for updates see https://github.com/ultralytics/yolov5
[31m[1mrequirements:[0m /kaggle/working/requirements.txt not found, check failed.
2021-06-19 08:33:22.025748: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-06-19 08:33:27.143249: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-06-19 08:33:27.145591: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[34m[1mwandb[0m: W&B syncing is set to `offline` in this directory.  Run `wandb online` or set WANDB_MODE=online to enable cloud syncing.










































[34m[1mwandb[0m: Waiting for W&B process to finish, PID 185
[34m[1mwandb[0m: Program ended successfully.
[34m[1mwandb[0m:

In [17]:
!rm -rf yolov5
!rm -rf wandb