In [1]:
import pandas as pd
import numpy as np
import boto3
import os
import shutil
import numpy as np
import pandas as pd

from ast import literal_eval

In [2]:
def map_per_image(label, predictions):
    """Computes the precision score of one image.

    Parameters
    ----------
    label : string
            The true label of the image
    predictions : list
            A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """    
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0

def map_per_set(labels, predictions):
    """Computes the average over multiple images.

    Parameters
    ----------
    labels : list
             A list of the true labels. (Only one true label per images allowed!)
    predictions : list of list
             A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """
    return np.mean([map_per_image(l, p) for l,p in zip(labels, predictions)])



s3 = boto3.resource('s3') # assumes credentials & configuration are handled outside python in .aws directory or environment variables

def download_s3_folder(bucket_name, s3_folder, local_dir=None):
    """
    Download the contents of a folder directory
    Args:
        bucket_name: the name of the s3 bucket
        s3_folder: the folder path in the s3 bucket
        local_dir: a relative or absolute directory path in the local file system
    """
    bucket = s3.Bucket(bucket_name)
    for obj in bucket.objects.filter(Prefix=s3_folder):
        target = obj.key if local_dir is None \
            else os.path.join(local_dir, os.path.relpath(obj.key, s3_folder))
        if not os.path.exists(os.path.dirname(target)):
            os.makedirs(os.path.dirname(target))
        if obj.key[-1] == '/':
            continue
        bucket.download_file(obj.key, target)
        

# Read the predictions

In [3]:
predictions = pd.read_csv('final_predictions.csv', converters={'predictions': literal_eval})

# Read the holdout set

In [5]:
os.mkdir('holdout')
s3 = boto3.resource('s3') # assumes credentials & configuration are handled outside python in .aws directory or environment variables
bucket = s3.Bucket('hackathon-whale-dolphin-compressed')
bucket.download_file('holdout_y.csv', 'holdout_y_final.csv')
holdout = pd.read_csv('holdout_y_final.csv')
download_s3_folder('hackathon-whale-dolphin-compressed', 'holdout',local_dir='holdout')

In [5]:
predictions.head()

Unnamed: 0,image,predictions
0,57cae124515b2d.jpg,"[f195c38bcf17, a9f87526d3ef, 24abce9b3f11, 4e1..."
1,094d2dfb60370f.jpg,"[66423582531a, 9f3613b5c45b, 3e5426121fce, 2e2..."
2,40c97c14da3308.jpg,"[a265c977dc30, 76b5aad6b790, 7ca409a5c15b, 910..."
3,ea79e90cf8e8c4.jpg,"[7924af72aa50, 4b228615a4b1, ffbb4e585ff2, 03c..."
4,1f430013db7d8b.jpg,"[8deb2171580e, 559af6cf3cc5, 25645f54d219, 208..."


In [9]:
holdout['individual_id'] = np.where(holdout['count']==1, 'new_individual', holdout['individual_id'])

In [10]:
holdout.head()

Unnamed: 0,image,species,individual_id,class,path,count,cumcount
0,57cae124515b2d.jpg,bottlenose_dolphin,005cab4fa315,dolphin,./train/57cae124515b2d.jpg,2,0
1,094d2dfb60370f.jpg,short_finned_pilot_whale,new_individual,whale,./train/094d2dfb60370f.jpg,1,0
2,40c97c14da3308.jpg,bottlenose_dolphin,018aaba90625,dolphin,./train/40c97c14da3308.jpg,30,0
3,ea79e90cf8e8c4.jpg,spotted_dolphin,new_individual,dolphin,./train/ea79e90cf8e8c4.jpg,1,0
4,1f430013db7d8b.jpg,short_finned_pilot_whale,01fe58b26fa5,whale,./train/1f430013db7d8b.jpg,3,0


# Final MAP@5 score

In [7]:
map_per_set(holdout['individual_id'].to_list(),predictions['predictions'].to_list())

0.0008896302474284126