In [1]:
def apk(actual, predicted, k=10):
    """
    Computes the average precision at k.
    This function computes the average prescision at k between two lists of
    items.
    Parameters
    ----------
    actual : list
             A list of elements that are to be predicted (order doesn't matter)
    predicted : list
                A list of predicted elements (order does matter)
    k : int, optional
        The maximum number of predicted elements
    Returns
    -------
    score : double
            The average precision at k over the input lists
    """
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

def mapk(actual, predicted, k=10):
    """
    Computes the mean average precision at k.
    This function computes the mean average prescision at k between two lists
    of lists of items.
    Parameters
    ----------
    actual : list
             A list of lists of elements that are to be predicted 
             (order doesn't matter in the lists)
    predicted : list
                A list of lists of predicted elements
                (order matters in the lists)
    k : int, optional
        The maximum number of predicted elements
    Returns
    -------
    score : double
            The mean average precision at k over the input lists
    """
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

In [2]:
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
# import ml_metrics as metrics
%matplotlib inline

In [3]:
df = pd.read_csv('train.csv')

In [4]:
train, test = train_test_split(df, test_size=0.2, random_state=41)

In [5]:
X_train = train.drop(['place_id'], axis=1)
y_train = train.place_id

X_test = test.drop(['place_id'], axis=1)
y_test = test.place_id

In [6]:
places = train[['x', 'y', 'place_id']].groupby(['place_id']).mean()

In [7]:
places.reset_index(inplace=True)

In [9]:
clf = KNeighborsClassifier(n_neighbors=1)
clf.fit(places[['x', 'y']], places.place_id)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')

In [11]:
preds = clf.predict(X_test[['x', 'y']])

In [22]:
apk(list(y_test.values), list(preds), k=3)

1.0

In [18]:
sum(y_test.values == preds)

628201

In [20]:
628201 / len(preds)

0.10787149883963627

In [24]:
np.array([1,2,3]).reshape((3,1))

array([[1],
       [2],
       [3]])

In [27]:
preds_list = preds.reshape((preds.shape[0],1))
y_test_list = y_test.reshape((y_test.shape[0], 1))

In [28]:
mapk(y_test_list, preds_list)

0.10787149883963627

In [37]:
clf.kneighbors(X_test[['x', 'y']].iloc[0:3], 3)

(array([[ 0.01500046,  0.01851305,  0.0215449 ],
        [ 0.01134724,  0.0227848 ,  0.05186886],
        [ 0.01226417,  0.02663006,  0.02802244]]),
 array([[25275, 96552, 80211],
        [80850, 95254,  8562],
        [65946, 19041, 98035]], dtype=int64))

In [32]:
X_test[['x', 'y']].iloc[0]

x    4.7021
y    2.5319
Name: 18890777, dtype: float64

In [36]:
places[['x', 'y']].iloc[80212]

x    1.128425
y    7.362539
Name: 80212, dtype: float64