In [None]:
import sys
sys.path.append('../')

import os

import numpy as np
import pandas as pd
from PIL import Image
from sklearn.externals import joblib
from tqdm import tqdm_notebook as tqdm

from common_blocks.utils import run_length_encoding
from common_blocks.metrics import compute_eval_metric


METADATA_FILEPATH = 'YOUR/metadata.csv'
OUT_OF_FOLD_TRAIN_RESULTS_FILEPATH = 'YOUR/out_of_fold_train_predictions.pkl'

METADATA_FILEPATH = '/mnt/ml-team/minerva/open-solutions/salt/files/metadata.csv'
MODEL_DIRPATH = '/mnt/ml-team/minerva/open-solutions/salt/files/out_of_fold_predictions'
EXPERIMENTS = [ 'sal_1574_cv_825_lb_840',
 'sal_1890_cv_834_lb_840',
 'sal_1980_cv_846_lb_846',
 'sal_1976_cv_849_lb_847',
 'sal_1977_cv_849_lb_847',
 'sal_2003_cv_833_lb_840',
 'sal_2036_cv_850_lb_847',
 'sal_2148_cv_849_lb_848',
 'sal_2140_cv_850_lb_848',
 'sal_2151_cv_850_lb_848',
 'sal_2103_cv_836_lb_841',
 'sal_2173_cv_851_lb_848',
 'sal_2163_cv_851_lb_848'
              ]

OUT_OF_FOLD_TRAIN_PREDICTIONS = ['{}/{}/out_of_fold_train_predictions.pkl'.format(MODEL_DIRPATH, experiment)
                                for experiment in EXPERIMENTS]

OUT_OF_FOLD_TEST_PREDICTIONS = ['{}/{}/out_of_fold_test_predictions.pkl'.format(MODEL_DIRPATH, experiment)
                                for experiment in EXPERIMENTS]

In [None]:
display(os.listdir(MODEL_DIRPATH))

In [None]:
def load_img(path):
    img = np.array(Image.open(path))
    return img

In [None]:
metadata = pd.read_csv(METADATA_FILEPATH)
metadata.head()

# Average out of fold predictions

In [None]:
oof_train = joblib.load(OUT_OF_FOLD_TRAIN_PREDICTIONS[0])

mean_train_predictions = {idx:np.zeros((101,101)) for idx in oof_train['ids']}

for filepath in tqdm(OUT_OF_FOLD_TRAIN_PREDICTIONS):
    oof_train = joblib.load(filepath)
    ids, images = oof_train['ids'], oof_train['images']
    for idx, image in zip(ids, images):
        mask = image[1,:,:]
        mean_train_predictions[idx]+=mask

mean_train_predictions = {idx:1.0 * m/len(OUT_OF_FOLD_TRAIN_PREDICTIONS) 
                          for idx, m in mean_train_predictions.items()}

In [None]:
THRESHOLD = 0.3
MIN_SIZE = 0.0

iouts = []
for image_id, prediction_map in tqdm(mean_train_predictions.items()):
    mask = (prediction_map > THRESHOLD).astype(np.uint8)
    if mask.sum() < MIN_SIZE:
        mask = np.zeros_like(mask)
    ground_truth = load_img(metadata[metadata['id']==image_id]['file_path_mask'].values[0])
    ground_truth = (ground_truth > 0).astype(np.uint8)
    iout = compute_eval_metric(ground_truth, mask)
    iouts.append(iout)
print('IOUT {}'.format(np.mean(iouts)))

# Average test predictions

In [None]:
oof_test = joblib.load(OUT_OF_FOLD_TEST_PREDICTIONS[0])

mean_test_predictions = {idx:np.zeros((101,101)) for idx in oof_test['ids']}

for filepath in tqdm(OUT_OF_FOLD_TEST_PREDICTIONS):
    oof_test = joblib.load(filepath)
    ids, images = oof_test['ids'], oof_test['images']
    for idx, image in zip(ids, images):
        mask = image[1,:,:]
        mean_test_predictions[idx]+=mask

mean_test_predictions = {idx:1.0 * m/len(OUT_OF_FOLD_TEST_PREDICTIONS) 
                          for idx, m in mean_test_predictions.items()}

In [None]:
output = []
for image_id, prediction_map in tqdm(mean_test_predictions.items()):
    mask = (prediction_map > THRESHOLD).astype(np.uint8)
    if mask.sum() < MIN_SIZE:
        mask = np.zeros_like(mask)
    rle_encoded = ' '.join(str(rle) for rle in run_length_encoding(mask))
    output.append([image_id, rle_encoded])

submission = pd.DataFrame(output, columns=['id', 'rle_mask']).astype(str)

In [None]:
submission.to_csv(os.path.join(MODEL_DIRPATH, 'prediction_average_cv_850_lb_xxx.csv'),index=None)