In [None]:
from pandas import DataFrame, Series
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np

In [None]:
#APK Metric Function
def apk(actual, predicted, k=3):
    if len(predicted) > k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

In [None]:
#Reading in Data
train = pd.read_csv('train.csv')

In [None]:
#Preprocessing time into date
initial_date = np.datetime64('2014-01-01T01:01',   #Arbitrary decision
                                 dtype='datetime64[m]') 
d_times = pd.DatetimeIndex(initial_date + np.timedelta64(int(mn), 'm') 
                           for mn in train.time.values)    

In [None]:
#Creating our grid
x_range = 0.5
y_range = 0.25

fw = [500, 1000] #feature weights
x_weights = fw[0]
y_weights = fw[1]

xpoints = np.arange(0,x_weights*10,x_range*x_weights)
xpoints = np.append(xpoints,x_weights*10+x_range*x_weights)
ypoints = np.arange(0,y_weights*10,y_range*y_weights)
ypoints = np.append(ypoints,y_weights*10+y_range)
xs, ys = np.meshgrid(xpoints,ypoints)

In [None]:
#Feature Engineering
train['x'] = train['x'] * fw[0]
train['y'] = train['y'] * fw[1]
train['hour'] = d_times.hour 
train['weekday'] = d_times.weekday
train['day'] = d_times.day
train['month'] = d_times.month
train['year'] = d_times.year

In [None]:
#Train Test Split by Time
split_t = 730000
features = ['x','y','hour','day','weekday','month','year','accuracy','place_id']

local_train = train[train.time < split_t][features]
local_test = train[train.time >= split_t][features]

In [None]:
#First, train on one grid
score = 0.0
total = 0
cum_scores = []

i = 0
j = 0

subset = local_train[(local_train.x >= xs[i][j]) & (local_train.x < xs[i][j+1]) & \
                     (local_train.y >= ys[i][j]) & (local_train.y < ys[i+1][j])]
test_subset = local_test[(local_test.x >= xs[i][j]) & (local_test.x < xs[i][j+1]) & \
                         (local_test.y >= ys[i][j]) & (local_test.y < ys[i+1][j])]

In [None]:
y = subset['place_id']
clf = KNeighborsClassifier(n_neighbors=40)
clf.fit(subset[features[:-1]], y)
all_preds = clf.predict_proba(test_subset[features[:-1]])

In [None]:
all_preds.shape

In [None]:
for record in range(len(all_preds)):
    top3_idx = all_preds[record].argsort()[-3:][::-1]
    preds = clf.classes_[top3_idx]
    apk_score = apk([test_subset.place_id.iloc[record]],preds,3)
    score += apk_score
    total += 1

print(score/total)

In [None]:
#Iterate through all grids
score = 0.0
total = 0

for i in range(len(ypoints)-1):
	for j in range(len(xpoints)-1):
		subset = local_train[(local_train.x >= xs[i][j]) & (local_train.x < xs[i][j+1]) & \
					   		 (local_train.y >= ys[i][j]) & (local_train.y < ys[i+1][j])]
		test_subset = local_test[(local_test.x >= xs[i][j]) & (local_test.x < xs[i][j+1]) & \
						   		 (local_test.y >= ys[i][j]) & (local_test.y < ys[i+1][j])]
		if len(test_subset)==0:
			print('Moving on from j=%d' % j)
			continue

		if len(subset)==0:
			continue

		y = subset['place_id']
		clf = KNeighborsClassifier(n_neighbors=40)
		clf.fit(subset[features[:-1]], y)
		all_preds = clf.predict_proba(test_subset[features[:-1]])

		for record in range(len(all_preds)):
			top3_idx = all_preds[record].argsort()[-3:][::-1]
			preds = clf.classes_[top3_idx]
			apk_score = apk([test_subset.place_id.iloc[record]],preds,3)
			score += apk_score
			total += 1
		print(j)
		print(score/total)
	print(i)