In [1]:
import numpy as np
import pandas as pd
from sklearn import neighbors
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import math
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
data_dir = "data/indoor-location-navigation/"
train_dir = data_dir + "train/"
test_dir = data_dir + "test/"

In [3]:
from dataclasses import dataclass

import numpy as np


@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray


def read_data_file(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    for line_data in lines:
        line_data = line_data.strip()
        if not line_data or line_data[0] == '#':
            continue

        line_data = line_data.split('\t')

        if line_data[1] == 'TYPE_ACCELEROMETER':
            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ROTATION_VECTOR':
            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
            wifi.append(wifi_data)
            continue

        if line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            rssi = line_data[6]
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]
            ibeacon.append(ibeacon_data)
            continue

        if line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])

    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)

    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

In [13]:
train = "data/5a0546857ecc773753327266_train.csv"
test = "data/5a0546857ecc773753327266_test.csv"

In [6]:
data = pd.read_csv("wifi_features/wifi_features/train/5a0546857ecc773753327266_1000_train.csv", index_col=0)

train_data, test_data = train_test_split(data, test_size=0.3, random_state = 42) 

x_train = train_data.iloc[:,:-4]
y_trainy = train_data.iloc[:,-3]
y_trainx = train_data.iloc[:,-4]
y_trainf = train_data.iloc[:,-2]

# x_train.head()

In [11]:
modely = XGBRegressor(n_estimators=125)
modely.fit(x_train, y_trainy)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=125, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [12]:
pred = modely.predict(test_data.iloc[:,:-4]) #make prediction on test set
print(pred)
y_test = test_data[['y']]
error = math.sqrt(mean_squared_error(y_test, pred)) #calculate rmse
# rmse_val.append(error) #store rmse values
print('RMSE value for k= ' , 5 , 'is:', error)

[ 16.03268  178.00539   93.811584 ... 158.8153   114.9935   161.55223 ]
RMSE value for k=  5 is: 4.80924212069156


In [2]:
pd.read_csv("wifi_features/wifi_features/train/5a0546857ecc773753327266_1000_train.csv").head()

Unnamed: 0.1,Unnamed: 0,000840e5c600de293cea57f13326f273c86c3988,00ad587dcb9c7ce3788b92e22777a22ee0efea31,00af060fc145ee6a6a50475efa57b91cbf54237f,00bcc61bdea4d52d050822d66952dd707c2fcdf3,00f0904087c01d922d6ebf3005607dfdeaf6687b,011e20ebf721a1c6dfec42e8ed1e2ac566073a2a,01d2f676abab6ec03ec5dc696bfd49d66e392ea1,01e25e4a25acd32baf5137b3031151f751fadbb4,026c2f057932da75680b21ecdbd23bf9cb9350f3,...,fdc189e5a19850397f37201f4acc378cfddcf0d6,fdc19f011587b75c11a6c30d8ca06d90107b6bde,fdf37fa13679f581bdfaae3b99e368633e0a144b,fdfe926caf5f49a88a9bcab8d025e887f422128b,fe3211f90e4ab1f500e10fe175ae6142f4b13130,ffa41c79865d7fb336f586e0dec8b080db1027fb,x,y,f,path
0,4,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,155.65668,89.40598,-1,5e158edff4c3420006d52172
1,4,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,155.65668,89.40598,-1,5e158edff4c3420006d52172
2,4,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,154.68399,81.80792,-1,5e158edff4c3420006d52172
3,4,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,154.68399,81.80792,-1,5e158edff4c3420006d52172
4,4,-999,-999,-999,-999,-999,-999,-999,-999,-999,...,-999,-999,-999,-999,-999,-999,154.68399,81.80792,-1,5e158edff4c3420006d52172


In [14]:
train_df = pd.read_csv(train)
train_df.head()

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,...,rssi_94,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99,x,y,floor,path
0,db01605eac3f33540038bd9722aba25774871d43,965f254a2e8d05bbb40bd2413ff61de3ad6c4151,0b64e537cc3d1818ec46f94f8dc14043a98d0089,922e582c66016a2b9f64e38f89ebe82f66eefb24,dc4c46287575c45f3e32c022d868d047b485ed4c,93e20595eeef175d3aa3c3381f6a22ee792d48d9,b2b0ddbb5a2aadfc6ab2f388db584b6c280d3f82,8c936564ea4b4300576f53136505527eb5972c07,61c3aaf1a526f808c05952ea3f098e37354a674a,3f564032c7eebc173b38aee35225e323d4389faf,...,-79,-79,-79,-79,-79,-79,107.85044,161.89262,-1,5e1580adf4c3420006d520d4
1,965f254a2e8d05bbb40bd2413ff61de3ad6c4151,db01605eac3f33540038bd9722aba25774871d43,1f37bbb3f42125f665b83584d0376b21ec3eb43c,922e582c66016a2b9f64e38f89ebe82f66eefb24,dc4c46287575c45f3e32c022d868d047b485ed4c,93e20595eeef175d3aa3c3381f6a22ee792d48d9,5c10b343d767a30515e6015de25751a2883328f8,3f564032c7eebc173b38aee35225e323d4389faf,46c934893439700099d03a6892ea934ecb2729d6,16374260af7d03b10f167358a4f6a70620e131f4,...,-79,-79,-79,-79,-80,-80,107.85044,161.89262,-1,5e1580adf4c3420006d520d4
2,965f254a2e8d05bbb40bd2413ff61de3ad6c4151,db01605eac3f33540038bd9722aba25774871d43,dc4c46287575c45f3e32c022d868d047b485ed4c,922e582c66016a2b9f64e38f89ebe82f66eefb24,93e20595eeef175d3aa3c3381f6a22ee792d48d9,61c3aaf1a526f808c05952ea3f098e37354a674a,ce28608c3d091ac0d25d84459ebad253edf83e1f,1bb0e992cff45a54d29e97f47a7d1281435a5e3b,1f37bbb3f42125f665b83584d0376b21ec3eb43c,ca86c5b074c5768e481e069b751bf22c6d95bd48,...,-77,-77,-78,-78,-78,-78,98.33065,163.34334,-1,5e1580adf4c3420006d520d4
3,61c3aaf1a526f808c05952ea3f098e37354a674a,922e582c66016a2b9f64e38f89ebe82f66eefb24,93e20595eeef175d3aa3c3381f6a22ee792d48d9,db01605eac3f33540038bd9722aba25774871d43,965f254a2e8d05bbb40bd2413ff61de3ad6c4151,0f5daed11a61e0d6941a1a42ff428ca216d61003,ce28608c3d091ac0d25d84459ebad253edf83e1f,40d99a3e5214aa704f637b7d72631e69550ee256,2aa08d092d0199c06d22684642ef1c79d9722adb,149c09a117b9851201c75f97b4a7cc94b75fdcb4,...,-75,-75,-76,-76,-77,-77,98.33065,163.34334,-1,5e1580adf4c3420006d520d4
4,965f254a2e8d05bbb40bd2413ff61de3ad6c4151,93e20595eeef175d3aa3c3381f6a22ee792d48d9,61c3aaf1a526f808c05952ea3f098e37354a674a,51782c2fabefa97e99dca895fd36f1a47e214610,db01605eac3f33540038bd9722aba25774871d43,0f5daed11a61e0d6941a1a42ff428ca216d61003,ce28608c3d091ac0d25d84459ebad253edf83e1f,4c83a7a1e51bfa8a5fa20e854ab3feec057c52c9,599fa96d549ed870671d6bc1927aaa8bbaacca12,dc9fd0f591e9bfc22748106f31d72a23c1d294fd,...,-75,-75,-76,-76,-77,-77,98.33065,163.34334,-1,5e1580adf4c3420006d520d4


In [15]:
test_df = pd.read_csv(test)
test_df.head()

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,...,rssi_90,rssi_91,rssi_92,rssi_93,rssi_94,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99
0,eebf5db207eec2f3e041f92153d789270f346821,323607d8444900d64151ee06d164738ac727bbce,7805f319f3f591986effe78c5b41143180278f2d,02a1be3a5dab38320f879489d8a1e0f2a72768b3,b26914599f6d9ba16b43975394e1eeb9d82f4bab,6bc91b3951089c3a225396608b138ca178479924,d84cce12fbfba61bf930123050f61a11e2a29310,5b225e187d0dec3110683a74d0c9a5a4cb2022f5,b2546cae6e588d38618eacc557dd0385812197cf,8464ea586ee5479e1250f938d7c01e9bc68cefe8,...,-60,-60,-60,-60,-61,-61,-61,-61,-61,-61
1,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,bd9bc0a2092c040bfe6ba12f8aafac24e83b312a,d771612396c3e2e557e986fafd9fc2c56a99d3cd,13b7aeaf441f2161481481fe67eace721cff07ab,c48db7f3ed1858bb4fc191230e3d79d5eb178604,b4dbb0b30caa1d0f21b7b4185ba061556cada67a,b2546cae6e588d38618eacc557dd0385812197cf,d84cce12fbfba61bf930123050f61a11e2a29310,5b225e187d0dec3110683a74d0c9a5a4cb2022f5,6bc91b3951089c3a225396608b138ca178479924,...,-61,-61,-61,-61,-61,-61,-61,-61,-61,-62
2,6bc91b3951089c3a225396608b138ca178479924,b26914599f6d9ba16b43975394e1eeb9d82f4bab,b2546cae6e588d38618eacc557dd0385812197cf,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,d84cce12fbfba61bf930123050f61a11e2a29310,5b225e187d0dec3110683a74d0c9a5a4cb2022f5,7805f319f3f591986effe78c5b41143180278f2d,b4dbb0b30caa1d0f21b7b4185ba061556cada67a,bd9bc0a2092c040bfe6ba12f8aafac24e83b312a,d771612396c3e2e557e986fafd9fc2c56a99d3cd,...,-60,-60,-61,-61,-61,-61,-61,-62,-62,-62
3,de53ffe7e3c71c9ed5c845fa50e0521efa5f3685,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,bccd6a9054f8649ad43fe96b766687fb769b064f,f64c13fd10a07bca1bf2b7bd7a80630632ce62c9,7590cf109f6ff3277fd18d10f4727a8777d675ce,7129f110688db020946105b359cae2e59338135b,15d53b7189ffbd7c6010c388a9ccea417d4f28ee,6915ad24a2edf8047f749233e19e9853f5dc17fd,12911a64fecf13f2e9fb0aaed554621e3b0bacde,a929157f3cc32a433b02ad7d7876e9a1678d3944,...,-59,-59,-60,-60,-60,-60,-61,-61,-61,-61
4,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,a929157f3cc32a433b02ad7d7876e9a1678d3944,6bc91b3951089c3a225396608b138ca178479924,000840e5c600de293cea57f13326f273c86c3988,662791f44cd61d0426634cf093bf0ff1bfd88c2c,c729e2e4f5a2888583cfebcd98b3178023f58b8e,15d53b7189ffbd7c6010c388a9ccea417d4f28ee,f64c13fd10a07bca1bf2b7bd7a80630632ce62c9,d5dad1fcdae9e773ede884b3b4d781d5ee1ec90e,6915ad24a2edf8047f749233e19e9853f5dc17fd,...,-63,-63,-63,-63,-63,-63,-63,-63,-63,-64


In [23]:
train_data = train_df.sample(frac=0.8, random_state=25)
test_data = train_df.drop(train_data.index)

train_set = train_data.copy()
for i in range(0, 100):
    train_set['bssid_' + str(i)] = train_set['bssid_' + str(i)].astype('category')
    train_set['bssid_' + str(i)] = train_set['bssid_' + str(i)].cat.codes

train_set = train_set.drop("x", axis=1)
train_set = train_set.drop("y", axis=1)
train_set = train_set.drop("floor", axis=1)
train_set = train_set.drop("path", axis=1)

train_set.head()

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,...,rssi_90,rssi_91,rssi_92,rssi_93,rssi_94,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99
5098,580,1365,1305,1133,595,1327,1196,942,908,997,...,-69,-69,-70,-70,-70,-70,-70,-70,-70,-70
5064,880,241,668,462,299,1084,483,151,220,315,...,-69,-69,-69,-69,-69,-69,-69,-69,-69,-69
3081,528,1002,1050,428,1528,331,1437,1040,70,853,...,-69,-69,-70,-70,-70,-70,-70,-70,-70,-70
543,195,607,1547,1126,440,1010,1025,568,459,1003,...,-86,-86,-86,-86,-87,-87,-87,-88,-88,-89
5560,1023,1330,445,1516,289,271,1218,853,1301,1688,...,-72,-72,-72,-72,-72,-73,-73,-73,-73,-73


In [17]:
# train, test = train_test_split(train_set, test_size=0.3, random_state = 42)
# test_df.head()
test_set = train_data[['floor']]
test_set.head()


Unnamed: 0,floor
5098,1
5064,1
3081,0
543,-1
5560,1


In [18]:
test_df.head()

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,...,rssi_90,rssi_91,rssi_92,rssi_93,rssi_94,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99
0,eebf5db207eec2f3e041f92153d789270f346821,323607d8444900d64151ee06d164738ac727bbce,7805f319f3f591986effe78c5b41143180278f2d,02a1be3a5dab38320f879489d8a1e0f2a72768b3,b26914599f6d9ba16b43975394e1eeb9d82f4bab,6bc91b3951089c3a225396608b138ca178479924,d84cce12fbfba61bf930123050f61a11e2a29310,5b225e187d0dec3110683a74d0c9a5a4cb2022f5,b2546cae6e588d38618eacc557dd0385812197cf,8464ea586ee5479e1250f938d7c01e9bc68cefe8,...,-60,-60,-60,-60,-61,-61,-61,-61,-61,-61
1,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,bd9bc0a2092c040bfe6ba12f8aafac24e83b312a,d771612396c3e2e557e986fafd9fc2c56a99d3cd,13b7aeaf441f2161481481fe67eace721cff07ab,c48db7f3ed1858bb4fc191230e3d79d5eb178604,b4dbb0b30caa1d0f21b7b4185ba061556cada67a,b2546cae6e588d38618eacc557dd0385812197cf,d84cce12fbfba61bf930123050f61a11e2a29310,5b225e187d0dec3110683a74d0c9a5a4cb2022f5,6bc91b3951089c3a225396608b138ca178479924,...,-61,-61,-61,-61,-61,-61,-61,-61,-61,-62
2,6bc91b3951089c3a225396608b138ca178479924,b26914599f6d9ba16b43975394e1eeb9d82f4bab,b2546cae6e588d38618eacc557dd0385812197cf,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,d84cce12fbfba61bf930123050f61a11e2a29310,5b225e187d0dec3110683a74d0c9a5a4cb2022f5,7805f319f3f591986effe78c5b41143180278f2d,b4dbb0b30caa1d0f21b7b4185ba061556cada67a,bd9bc0a2092c040bfe6ba12f8aafac24e83b312a,d771612396c3e2e557e986fafd9fc2c56a99d3cd,...,-60,-60,-61,-61,-61,-61,-61,-62,-62,-62
3,de53ffe7e3c71c9ed5c845fa50e0521efa5f3685,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,bccd6a9054f8649ad43fe96b766687fb769b064f,f64c13fd10a07bca1bf2b7bd7a80630632ce62c9,7590cf109f6ff3277fd18d10f4727a8777d675ce,7129f110688db020946105b359cae2e59338135b,15d53b7189ffbd7c6010c388a9ccea417d4f28ee,6915ad24a2edf8047f749233e19e9853f5dc17fd,12911a64fecf13f2e9fb0aaed554621e3b0bacde,a929157f3cc32a433b02ad7d7876e9a1678d3944,...,-59,-59,-60,-60,-60,-60,-61,-61,-61,-61
4,1d1d62dcf72481cc9580fed3b724f0d27015aaf1,a929157f3cc32a433b02ad7d7876e9a1678d3944,6bc91b3951089c3a225396608b138ca178479924,000840e5c600de293cea57f13326f273c86c3988,662791f44cd61d0426634cf093bf0ff1bfd88c2c,c729e2e4f5a2888583cfebcd98b3178023f58b8e,15d53b7189ffbd7c6010c388a9ccea417d4f28ee,f64c13fd10a07bca1bf2b7bd7a80630632ce62c9,d5dad1fcdae9e773ede884b3b4d781d5ee1ec90e,6915ad24a2edf8047f749233e19e9853f5dc17fd,...,-63,-63,-63,-63,-63,-63,-63,-63,-63,-64


In [20]:
pred_df = test_data.copy()
for i in range(0, 100):
    pred_df['bssid_' + str(i)] = pred_df['bssid_' + str(i)].astype('category')
    pred_df['bssid_' + str(i)] = pred_df['bssid_' + str(i)].cat.codes

pred_df = pred_df.drop("path", axis=1)
pred_df = pred_df.drop("x", axis=1)
pred_df = pred_df.drop("y", axis=1)
pred_df = pred_df.drop("floor", axis=1)

pred_df.head()

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,...,rssi_90,rssi_91,rssi_92,rssi_93,rssi_94,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99
3,319,498,545,807,544,59,803,267,165,85,...,-75,-75,-75,-75,-75,-75,-76,-76,-77,-77
14,409,862,151,666,550,376,593,779,80,118,...,-71,-71,-71,-71,-71,-72,-72,-72,-72,-72
23,239,720,912,743,646,229,322,189,112,171,...,-75,-75,-75,-75,-75,-76,-76,-76,-76,-76
25,239,704,760,662,223,906,900,1006,760,892,...,-75,-75,-75,-75,-75,-75,-75,-75,-75,-75
32,115,702,665,139,221,897,362,534,926,74,...,-81,-81,-81,-82,-82,-83,-83,-83,-83,-83


In [24]:
model = neighbors.KNeighborsClassifier(n_neighbors = 35)

model.fit(train_set, test_set)  #fit the model
pred = model.predict(pred_df.values) #make prediction on test set
print(pred)
y_test = test_data[['floor']]
error = math.sqrt(mean_squared_error(y_test, pred)) #calculate rmse
# rmse_val.append(error) #store rmse values
print('RMSE value for k= ' , 5 , 'is:', error)

  return self._fit(X, y)


[ 0 -1 -1 ... -1 -1 -1]
RMSE value for k=  5 is: 2.244591850794731


In [32]:
t = y_test['floor'] == pred
t.value_counts()

False    1477
True      382
Name: floor, dtype: int64

In [None]:
y_test

In [33]:
# train_set.head()
# from sklearn.ensemble import GradientBoostingRegressor
model = XGBRegressor(eval_metric ='rmse', n_estimators=500, eta=0.1,
colsample_bytree=0.4, reg_lambda=2, reg_alpha=8)
# fit model
model.fit(train_set.values, test_set.values)

pred = model.predict(pred_df.values) #make prediction on test set
print(pred)
y_test = test_data[['x']]
error = math.sqrt(mean_squared_error(y_test, pred)) #calculate rmse
# rmse_val.append(error) #store rmse values
print('RMSE value for k= ' , 5 , 'is:', error)

[105.36975   97.92057   69.29567  ...  97.38879   74.102715  59.766552]
RMSE value for k=  5 is: 48.79697289377855


In [38]:
# train_set.head()
# from sklearn.ensemble import GradientBoostingRegressor

lgb_params = {'objective': 'root_mean_squared_error',
              'boosting_type': 'gbdt',
              'n_estimators': 50000,
              'learning_rate': 0.1,
              'num_leaves': 90,
              'colsample_bytree': 0.4,
              'subsample': 0.6,
              'subsample_freq': 2,
              'bagging_seed': 42,
              'reg_alpha': 8,
              'reg_lambda': 2,
              'random_state': 42,
              'n_jobs': -1
              }

model = LGBMRegressor(**lgb_params)
# fit model
model.fit(train_set.values, test_set.values)

pred = model.predict(pred_df.values) #make prediction on test set
print(pred)
y_test = test_data[['x']]
error = math.sqrt(mean_squared_error(y_test, pred)) #calculate rmse
# rmse_val.append(error) #store rmse values
print('RMSE value for k= ' , 5 , 'is:', error)

  y = column_or_1d(y, warn=True)


[94.19092361 88.05430355 61.67235026 ... 73.93105703 63.70671616
 72.37796199]
RMSE value for k=  5 is: 50.866361572008586
