**This notebook only changes the number of estimators of the lgb model. Please upvote the original [notebook](https://www.kaggle.com/devinanzelmo/wifi-features-lightgbm-starter).**

**Hats off to [Devin](https://www.kaggle.com/devinanzelmo) for his generous sharing!**

### Simple benchmark using wifi features and lightgbm 

Shows the use of wifi features I made to predict phone position. There is a lot of room for improvement, and for people interested in hyperparameter etc these features are an easy way to get started on this competition. Wifi features are available in [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features). See this [forum post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215445) for information on the approach. The code to generate the features is available in [this notebook](https://www.kaggle.com/devinanzelmo/wifi-features)

Proper cross validation should be added. Currently don't have access to the path file names for the individual training examples which means it is not possible to do stratify by path file. 


In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import glob
import os

In [None]:
feature_dir = "../input/indoor-navigation-and-location-wifi-features/wifi_features"

In [None]:
# the metric used in this competition
def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat - x,2) + np.power(yhat-y,2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]

# get our train and test files
train_files = sorted(glob.glob(os.path.join(feature_dir, '*_train.csv')))
test_files = sorted(glob.glob(os.path.join(feature_dir, '*_test.csv')))
ssubm = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv', index_col=0)

In [None]:
predictions = list()

for e, file in enumerate(train_files):
    data = pd.read_csv(file, index_col=0)
    test_data = pd.read_csv(test_files[e], index_col=0)

    np.random.shuffle(data.values)

    cutoff = int(data.shape[0] * 0.95)

    x_train = data.iloc[:cutoff,:-3]
    y_trainy = data.iloc[:cutoff,-2]
    y_trainx = data.iloc[:cutoff,-3]
    y_trainf = data.iloc[:cutoff,-1]

    x_test = data.iloc[cutoff:, :-3]
    y_testy = data.iloc[cutoff:, -2]
    y_testx = data.iloc[cutoff:, -3]
    y_testf = data.iloc[cutoff:, -1]

    # change 75 to 150
    modely = lgb.LGBMRegressor(
        n_estimators=150, num_leaves=127)
    modely.fit(x_train, y_trainy)

    modelx = lgb.LGBMRegressor(
        n_estimators=150, num_leaves=127)
    modelx.fit(x_train, y_trainx)

    modelf = lgb.LGBMClassifier(
        n_estimators=150, num_leaves=127)
    modelf.fit(x_train, y_trainf)

    predsx = modelx.predict(x_test)
    predsy = modely.predict(x_test)
    predsf = modelf.predict(x_test)

    print(file)
    print(data.shape)
    print(comp_metric(predsx, predsy, predsf, y_testx.values, y_testy.values, y_testf.values))
    
    test_predsx = modelx.predict(test_data.iloc[:,:-1])
    test_predsy = modely.predict(test_data.iloc[:,:-1])
    test_predsf = modelf.predict(test_data.iloc[:,:-1])
    
    test_preds = pd.DataFrame(np.stack((test_predsf, test_predsx, test_predsy))).T
    test_preds.columns = ssubm.columns
    test_preds.index = test_data["site_path_timestamp"]
    test_preds["floor"] = test_preds["floor"].astype(int)
    predictions.append(test_preds)

In [None]:
# generate prediction file 
all_preds = pd.concat(predictions)
all_preds = all_preds.reindex(ssubm.index)
all_preds.to_csv('submission.csv')