In [1]:
import pandas as pd
import numpy as np
import cv2
import os
import random
import ast
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

%matplotlib inline

In [6]:
DIR_RESULTS = '/kaggle/input/global-wheat-detection-public'

# Your OOF predictions
VALID_RESULTS = [
    f"{DIR_RESULTS}/validation_results_fold0_best.csv",
    f"{DIR_RESULTS}/validation_results_fold1_best.csv",
    f"{DIR_RESULTS}/validation_results_fold2_best.csv",
    f"{DIR_RESULTS}/validation_results_fold3_best.csv",
    f"{DIR_RESULTS}/validation_results_fold4_best.csv",
]

# Below this area the size category of the box is 'small'
AREA_SMALL = 56 * 56

# Below this (and above small) is medium;
# Above this is large.
AREA_MEDIUM = 96 * 96

# If the box is at most this far from either of the borders
# we mark the box as 'is_border = True'
BORDER_SIZE = 2

# In these experiments I used 800px inputs.
# For analysis, we have to scale back to 1024px
# because the GT boxes are in that size.
SCALE = 1024/800

# Analizing at this threshold
THRESHOLD = 0.5

# input dir
DIR_INPUT = '/userhome/34/h3509807/wheat-data'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_TEST = f'{DIR_INPUT}/test'

#### Training Data

In [7]:
train_df = pd.read_csv(f"{DIR_INPUT}/train.csv")

# From Andrew's kernel
train_df[['x', 'y', 'w', 'h']] = pd.DataFrame(
    np.stack(train_df['bbox'].apply(lambda x: ast.literal_eval(x)))).astype(np.float32)
train_df.drop(columns=['bbox'], inplace=True)

train_df['x2'] = train_df['x'] + train_df['w']
train_df['y2'] = train_df['y'] + train_df['h']

# Calculate the area of the boxes.
train_df['area'] = train_df['w'] * train_df['h']

# Is the box at the edge of the image
train_df['is_border'] = False

border_filt = ((train_df['x'] < BORDER_SIZE) | (train_df['y'] < BORDER_SIZE) |
             (train_df['x2'] > 1024 - BORDER_SIZE) | (train_df['y2'] > 1024 - BORDER_SIZE))
train_df.loc[border_filt, 'is_border'] = True

train_df['size'] = 'large'
train_df.loc[train_df['area'] < AREA_MEDIUM, 'size'] = 'medium'
train_df.loc[train_df['area'] < AREA_SMALL, 'size'] = 'small'

# These are the ground-truth boxes
train_df['is_gt'] = True

train_df['brightness'] = 0.0
train_df['contrast'] = 0.0
train_df['overlap_iou'] = 0.0

train_df.sort_values(by='image_id', inplace=True)