In [1]:
import os
import glob
import random
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import KFold, GroupKFold, cross_val_score

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)

In [3]:
numFolds = 5
wiFiIDFeatureDir = "referencePublicNotebooks/waypt_WiFiID1000Feat/"
wiFiDtFeatureDir = "referencePublicNotebooks/waypt_WiFiDt1000Feat/"

In [4]:
# the metric used in this competition
def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat - x,2) + np.power(yhat-y,2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]

# get our train and test files
wiFiIDFeatureTrainFiles = sorted(glob.glob(os.path.join(wiFiIDFeatureDir, 'train/*_train.csv')))
wiFiDtFeatureTrainFiles = sorted(glob.glob(os.path.join(wiFiDtFeatureDir, 'train/*_train.csv')))

In [5]:
len(wiFiIDFeatureTrainFiles), len(wiFiDtFeatureTrainFiles)

(24, 24)

In [6]:
e = 0

In [7]:
wiFiIDdf = pd.read_csv(wiFiIDFeatureTrainFiles[e])
wiFiDtdf = pd.read_csv(wiFiDtFeatureTrainFiles[e])

In [8]:
wiFiIDdf.head(3)

Unnamed: 0,000840e5c600de293cea57f13326f273c86c3988,00ad587dcb9c7ce3788b92e22777a22ee0efea31,00af060fc145ee6a6a50475efa57b91cbf54237f,00bcc61bdea4d52d050822d66952dd707c2fcdf3,00f0904087c01d922d6ebf3005607dfdeaf6687b,011e20ebf721a1c6dfec42e8ed1e2ac566073a2a,01d2f676abab6ec03ec5dc696bfd49d66e392ea1,01e25e4a25acd32baf5137b3031151f751fadbb4,026c2f057932da75680b21ecdbd23bf9cb9350f3,028a310e23177c3747d37971678dd964ee28ce17,...,fdc189e5a19850397f37201f4acc378cfddcf0d6,fdc19f011587b75c11a6c30d8ca06d90107b6bde,fdf37fa13679f581bdfaae3b99e368633e0a144b,fdfe926caf5f49a88a9bcab8d025e887f422128b,fe3211f90e4ab1f500e10fe175ae6142f4b13130,ffa41c79865d7fb336f586e0dec8b080db1027fb,x,y,f,path
0,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,224.45633,160.10567,-1,5e157323a280850006f3d017
1,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,232.462,164.41673,-1,5e157323a280850006f3d017
2,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,225.08406,165.7717,-1,5e157323a280850006f3d017


In [9]:
wiFiDtdf.head(3)

Unnamed: 0,000840e5c600de293cea57f13326f273c86c3988,00ad587dcb9c7ce3788b92e22777a22ee0efea31,00af060fc145ee6a6a50475efa57b91cbf54237f,00bcc61bdea4d52d050822d66952dd707c2fcdf3,00f0904087c01d922d6ebf3005607dfdeaf6687b,011e20ebf721a1c6dfec42e8ed1e2ac566073a2a,01d2f676abab6ec03ec5dc696bfd49d66e392ea1,01e25e4a25acd32baf5137b3031151f751fadbb4,026c2f057932da75680b21ecdbd23bf9cb9350f3,028a310e23177c3747d37971678dd964ee28ce17,...,fd179c5e4fd5e33493ae290adbbda2950ecf0427,fd1a502adb446e835797a88fad8e79d1e0bf4b4a,fd977a3af7be241a9ed0213acb3aa75e5dc00253,fdb1ad87bd6fb08014267f2586faeed0edc7412b,fdc189e5a19850397f37201f4acc378cfddcf0d6,fdc19f011587b75c11a6c30d8ca06d90107b6bde,fdf37fa13679f581bdfaae3b99e368633e0a144b,fdfe926caf5f49a88a9bcab8d025e887f422128b,fe3211f90e4ab1f500e10fe175ae6142f4b13130,ffa41c79865d7fb336f586e0dec8b080db1027fb
0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
1,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
2,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0


In [None]:
wiFiIDFeature = wiFiIDdf.iloc[:,:-4].to_numpy()
wiFiDtFeature = wiFiDtdf.to_numpy()
y_trainy = wiFiIDdf.iloc[:,-3].values
y_trainx = wiFiIDdf.iloc[:,-4].values
y_trainf = wiFiIDdf.iloc[:,-2].values

In [None]:
print(wiFiDtFeature.shape, wiFiIDFeature.shape, y_trainx.shape, y_trainy.shape, y_trainf.shape)

In [None]:
wiFiDtFeature = wiFiDtFeature.astype(float) / 1000.0
wiFiIDFeature = wiFiIDFeature.astype(float) / 999.0

In [None]:
x_train = np.column_stack((wiFiIDFeature,wiFiDtFeature))
print(x_train.shape)

In [None]:
kfold = KFold(n_splits=numFolds, random_state=SEED, shuffle=True)
modely = lgb.LGBMRegressor(n_estimators=125, num_leaves=90)
modelx = lgb.LGBMRegressor(n_estimators=125, num_leaves=90)
modelf = lgb.LGBMClassifier(n_estimators=125, num_leaves=90)

In [None]:
results = cross_val_score(modely, x_train, y_trainy, cv=kfold)
print(f"Cross val score for y coordinate is {results}")

In [None]:
results = cross_val_score(modely, x_train, y_trainx, cv=kfold)
print(f"Cross val score for x coordinate is {results}")

In [None]:
results = cross_val_score(modely, x_train, y_trainf, cv=kfold)
print(f"Cross val score for floor is {results}")