In [44]:
import os, math
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from os.path import join
import boto3
from botocore.exceptions import ClientError

In [45]:
s3_bucket = "raster-vision"
s3_prefix = "results/tagging"
working_dir = "/opt/data/datasets/planet_kaggle/majority_vote_ensemble_working_dir/"

def download_file(model, end_path, force=False):
    local_path = join(working_dir, model, end_path)
    if not os.path.exists(local_path):
        s3 = boto3.resource('s3')
        key = join(s3_prefix, model, end_path)
        if not os.path.isdir(join(working_dir, model)):
            os.makedirs(join(working_dir, model))

        s3.Bucket(s3_bucket).download_file(key, local_path)
    return local_path

def get_test_preds(model_name):
    try:
        preds = download_file(model_name, "test_preds.csv")
    except ClientError:
        preds = download_file(model_name, "test_predictions.csv")
    model_name = model_name
    df = pd.read_csv(preds).rename(columns={'tags': model_name})
    return df


In [46]:
models = {"7_5_17/ensemble/0" : 1,
          "7_5_17/ensemble/1" : 1,
          "7_5_17/ensemble/2" : 1,
          "7_5_17/ensemble/3" : 1,
          "7_5_17/ensemble/4" : 1,
          "7_5_17/ensemble/avg" : 1,
          "6_30_17/rerun_best-test_aug": 1,
          "7_3_17/baseline-branch-tiffdrop": 1,
          "7_3_17/dual_resnet-stage4-moretrain": 1,
          "7_3_17/dual_resnet-stage5-moretrain": 1,
          "7_7_17/tif_ensemble_0/0": 1,
          "7_7_17/tif_ensemble_0/1": 1,
          "7_7_17/tif_ensemble_0/2": 1,
          "7_7_17/tif_ensemble_0/3": 1,
          "7_7_17/jpg_ensemble_0/0": 1,
          "7_7_17/jpg_ensemble_0/1": 1,
          "7_7_17/jpg_ensemble_0/2": 1,
          "7_7_17/jpg_ensemble_0/3": 1,
          "7_7_17/baseline_cyclic_1": 1,
          "7_7_17/baseline_cyclic_1200steps": 1
         }

l = ["7_7_17/baseline_cyclic_2",
     "7_6_17/baseline_cyclic",
     "7_2_17/IRRGtiff_v5actual_dense121_3x10epochs_0",
     "6_30_17/dense121_dropout_1",
     "6_30_17/dense121_dropout_2",
     "6_30_17/dense121_dropout_3",
     "6_29_17/dense121_3x10epochs_0",
     "6_29_17/IRRGBtiff_v5_dense121_3x10epochs_0",
     "6_28_17/baseline_testrot_0"
    ]
for x in l:
    models[x] = 1

In [47]:
model_list = list(models.items())

df = get_test_preds(model_list[0][0])
for i in range(1,len(model_list)):
    print(model_list[i])
    df = pd.merge(df, 
                 get_test_preds(model_list[i][0]),
                 on='image_name')

('7_7_17/tif_ensemble_0/3', 1)
('6_29_17/dense121_3x10epochs_0', 1)
('7_7_17/jpg_ensemble_0/3', 1)
('7_5_17/ensemble/1', 1)
('7_2_17/IRRGtiff_v5actual_dense121_3x10epochs_0', 1)
('7_3_17/dual_resnet-stage4-moretrain', 1)
('7_7_17/baseline_cyclic_2', 1)
('7_7_17/jpg_ensemble_0/0', 1)
('7_7_17/tif_ensemble_0/2', 1)
('6_29_17/IRRGBtiff_v5_dense121_3x10epochs_0', 1)
('7_6_17/baseline_cyclic', 1)
('7_7_17/baseline_cyclic_1', 1)
('7_5_17/ensemble/2', 1)
('7_7_17/jpg_ensemble_0/2', 1)
('7_5_17/ensemble/3', 1)
('7_5_17/ensemble/0', 1)
('7_5_17/ensemble/avg', 1)
('6_28_17/baseline_testrot_0', 1)
('7_3_17/baseline-branch-tiffdrop', 1)
('7_7_17/tif_ensemble_0/0', 1)
('7_7_17/tif_ensemble_0/1', 1)
('7_5_17/ensemble/4', 1)
('7_7_17/jpg_ensemble_0/1', 1)
('7_3_17/dual_resnet-stage5-moretrain', 1)
('6_30_17/rerun_best-test_aug', 1)
('6_30_17/dense121_dropout_1', 1)
('6_30_17/dense121_dropout_2', 1)
('6_30_17/dense121_dropout_3', 1)


In [48]:
atmos_tags = ["cloudy", "hazy", "clear", "partly_cloudy"]
from collections import Counter, defaultdict
def mapper(row):
    counts = defaultdict(int)
    for m in models:
        for tag in row[m].split(' '):
            counts[tag] += models[m]
    results = []
    for tag in counts:
        if counts[tag] >= math.ceil(len(models) / 2.0):
            results.append(tag)

    # Ensure there's only one atmosphere tag
    def atmos_tag_vote(tag):
        v = -1
        if tag in results:
            v = counts[tag]
        return (tag, v)
    
    atmos_tag_votes = list(map(atmos_tag_vote, atmos_tags))

    #for tag in atmos_tags:
    #    if tag in results: results.remove(tag)
    (atmos_tag, v) = max(atmos_tag_votes, key=lambda x: x[1])
    #if v != -1:
    #    results.append(atmos_tag)
    
    return ' '.join(sorted(results))
#df = df.sample(10)
df['tags'] = df.apply(mapper, axis=1)

In [36]:
df

Unnamed: 0,image_name,7_7_17/tif_ensemble_0/2,7_7_17/jpg_ensemble_0/1,7_3_17/baseline-branch-tiffdrop,7_7_17/tif_ensemble_0/0,7_7_17/jpg_ensemble_0/3,7_5_17/ensemble/1,7_3_17/dual_resnet-stage4-moretrain,7_7_17/tif_ensemble_0/3,7_7_17/baseline_cyclic_1200steps,...,7_5_17/ensemble/4,7_3_17/dual_resnet-stage5-moretrain,7_7_17/jpg_ensemble_0/0,7_5_17/ensemble/0,6_30_17/rerun_best-test_aug,7_7_17/baseline_cyclic_1,7_5_17/ensemble/2,7_7_17/jpg_ensemble_0/2,7_5_17/ensemble/3,tags
0,test_14234,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,...,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary,haze partly_cloudy primary
1,test_25618,agriculture bare_ground clear habitation prima...,bare_ground clear primary road water,agriculture artisinal_mine bare_ground clear h...,agriculture bare_ground clear habitation prima...,bare_ground clear primary road water,artisinal_mine bare_ground clear primary road ...,agriculture bare_ground clear conventional_min...,agriculture bare_ground clear conventional_min...,agriculture bare_ground clear primary road water,...,bare_ground clear primary road water,agriculture bare_ground clear habitation prima...,artisinal_mine bare_ground clear primary road ...,artisinal_mine bare_ground clear primary road ...,artisinal_mine bare_ground clear primary road ...,agriculture bare_ground clear primary road water,artisinal_mine bare_ground clear primary road ...,bare_ground clear primary road water,bare_ground clear primary road water,bare_ground clear primary road water
2,file_8418,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,...,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary
3,test_7039,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,...,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary
4,file_17312,agriculture cultivation partly_cloudy primary ...,agriculture cultivation partly_cloudy primary ...,agriculture cultivation partly_cloudy primary ...,agriculture cultivation partly_cloudy primary ...,agriculture partly_cloudy primary road water,agriculture cultivation partly_cloudy primary ...,agriculture partly_cloudy primary road,agriculture cultivation partly_cloudy primary ...,agriculture partly_cloudy primary road water,...,agriculture cultivation partly_cloudy primary ...,agriculture partly_cloudy primary road water,agriculture partly_cloudy primary road water,agriculture cultivation partly_cloudy primary ...,agriculture cultivation partly_cloudy primary ...,agriculture partly_cloudy primary road water,agriculture cultivation partly_cloudy primary ...,agriculture partly_cloudy primary road water,agriculture partly_cloudy primary road water,agriculture cultivation partly_cloudy primary ...
5,file_6891,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,...,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary
6,test_33554,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,...,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary,clear primary
7,test_30053,partly_cloudy primary water,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,...,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary,partly_cloudy primary
8,file_4455,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture bare_ground clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture bare_ground clear primary water,agriculture bare_ground clear primary water,agriculture clear primary water,...,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water,agriculture clear primary water
9,file_10405,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary slash_burn,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,...,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary slash_burn,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary,agriculture clear cultivation primary


In [49]:
df.to_csv("/opt/data/datasets/planet_kaggle/test_preds/ensemble-majority-5.csv", 
          columns=['image_name', 'tags'], index=False, header=True)

In [14]:
# What's going on with atmosphere?
dfa = pd.read_csv("/opt/data/datasets/planet_kaggle/test_preds/ensemble-majority.csv") \
                         .rename(columns={'tags': 'with'})
dfb = pd.read_csv("/opt/data/datasets/planet_kaggle/test_preds/ensemble-majority-2.csv") \
                         .rename(columns={'tags': 'without'})
df2 = pd.read_csv(join(dd, model_list[0][0])).rename(columns={'tags': model_list[0][0]})    
dfm = pd.merge(dfa, 
               dfb,
               on='image_name')
dfm.loc[dfm['with'] != dfm['without']].count()

image_name    3110
with          3110
without       3110
dtype: int64

In [None]:
dd = "/opt/data/datasets/planet_kaggle/test_preds/"
model_list = list(models.items())

df = pd.read_csv(join(dd, model_list[0][0])).rename(columns={'tags': model_list[0][0]})
for i in range(1,len(model_list)):
    df = pd.merge(df, 
                 pd.read_csv(join(dd, model_list[i][0])).rename(columns={'tags': model_list[i][0]}),
                 on='image_name')