In [None]:
import os

from tqdm import tqdm_notebook as tqdm
import glob
from collections import defaultdict
from typing import Sequence
import pandas as pd

EXPERIMENT_DIRPATH = '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/kuba/models'
SAMPLE_SUBMISSION_FILEPATH = '/mnt/ml-team/minerva/open-solutions/googleai-object-detection/data/sample_submission.csv'

In [None]:
def merge_submissions(submissions: Sequence[pd.DataFrame]) -> pd.DataFrame:
    sub_dict = defaultdict(lambda: [])
    merged_sub = defaultdict(lambda: [])
    for sub in submissions:
        for _, row in sub.iterrows():
            pred_string = str(row['PredictionString'])
            sub_dict[row['ImageId']]
            if 'nan' not in pred_string:
                sub_dict[row['ImageId']].append(pred_string)
    for key, value in sub_dict.items():
        merged_sub['ImageId'].append(key)
        merged_sub['PredictionString'].append(' '.join(value))
    final_sub = pd.DataFrame.from_dict(merged_sub)
    return final_sub

In [None]:
batch_submissions = []
for filepath in glob.glob('{}/batch_*/submission.csv'.format(EXPERIMENT_DIRPATH)):
    print('Processing {}'.format(filepath))
    batch_submission = pd.read_csv(filepath)
    batch_submissions.append(batch_submission)

In [None]:
submission = merge_submissions(batch_submissions)
submission.head()

# Fix those that have bbox with xmin <= xmax and stuff

In [None]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def clean_submission(submission):
    ids_cleaned , predictions_cleaned = [], []
    for i, row in tqdm(submission.iterrows(), total = len(submission)):
        img_predictions = row.PredictionString.split(' ')
        if img_predictions == ['']:
            continue
        else:
            img_predictions_cleaned = []
            for pred in chunker(img_predictions,size=6):
                label,score,x1,y1,x2,y2 = pred
                x1,y1,x2,y2 = float(x1),float(y1),float(x2),float(y2)
                if x2 <= x1 or y2 <= y1:
                    continue
                else:
                    img_predictions_cleaned.extend(pred)
            img_predictions_cleaned = ' '.join(img_predictions_cleaned)
            predictions_cleaned.append(img_predictions_cleaned)
            ids_cleaned.append(row.ImageId)
    submission_fixed = pd.DataFrame({'ImageId': ids_cleaned, 
                                     'PredictionString':predictions_cleaned})
    return submission_fixed

In [None]:
submission = clean_submission(submission)

In [None]:
def add_missing_image_ids(submission, sample_submission):
    submission['ImageId'] = submission['ImageId'].astype(str)
    sample_submission['ImageId'] = sample_submission['ImageId'].astype(str)
    fixed_submission = pd.merge(sample_submission[['ImageId']], submission, on=['ImageId'], how='outer')
    return fixed_submission

In [None]:
sample_submission = pd.read_csv(SAMPLE_SUBMISSION_FILEPATH)
submission = add_missing_image_ids(submission, sample_submission)

In [None]:
submission.to_csv(os.path.join(EXPERIMENT_DIRPATH,'merged_submission.csv'), index=False)