In [1]:
import math
import pandas as pd
import numpy as np
from collections import Counter
import mpld3 
mpld3.enable_notebook()
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from scipy import stats

import xgboost as xgb
import lightgbm as lgb

import os
%config IPCompleter.greedy=True 
import warnings

os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.chdir("/Users/mirbek/Desktop/predicting rent price")

In [33]:
print(os.listdir("/Users/mirbek/Desktop/predicting rent price"))
DF = pd.read_csv('df_pre_processed.csv', header ='infer')

['Model_mistakes_sq_mt_built.jpg', 'june2.csv', 'results_KNN.csv', 'june1.csv', '.DS_Store', 'results_LGB.csv', 'images', 'Untitled.ipynb', 'Error Analysis.ipynb', 'results_XGB.csv', 'EDA_Hola Lucas_rental price.ipynb', 'df_pre_processed.csv', '.ipynb_checkpoints', 'Model_mistakes_by_price.jpg', 'Rent_price_models.ipynb', 'neigh.csv']


In [34]:
#Preprocess data for modeliing

DF = DF.drop(['Neighborhood', 'District', 'rent_price_area'], axis = 1)
#Function for splitting into clusters
def group_types(df):
    group8 = df.loc[df['Neigh_cluster'] == 8]
    group7 = df.loc[df['Neigh_cluster'] == 7]
    group6 = df.loc[df['Neigh_cluster'] == 6]
    group5 = df.loc[df['Neigh_cluster'] == 5]
    group4 = df.loc[df['Neigh_cluster'] == 4]
    group3 = df.loc[df['Neigh_cluster'] == 3]
    group2 = df.loc[df['Neigh_cluster'] == 2]
    group1 = df.loc[df['Neigh_cluster'] == 1]
    group0 = df.loc[df['Neigh_cluster'] == 0]
    return group8, group7, group6,group5, group4, group3, group2, group1, group0  

group8, group7, group6,group5, group4, group3, group2, group1, group0 = group_types(DF)  

#Dropping outliars from each group. After boxplot we set these treshholds.
group0 = group0.loc[group0['rent_price'] < 1200]
group1 = group1.loc[group1['rent_price'] < 1370]
group2 = group2.loc[group2['rent_price'] < 2000]
group3 = group3.loc[group3['rent_price'] < 1800]
group4 = group4.loc[group4['rent_price'] < 2650]
group5 = group5.loc[group5['rent_price'] < 2200]
group6 = group6.loc[group6['rent_price'] < 2750]
group7 = group7.loc[group7['rent_price'] < 3375]
group8 = group8.loc[group8['rent_price'] < 4900]

In [36]:
DF = pd.concat([group0, group1, group2, group3, group4, group5, group6, group7, group8], ignore_index=True, sort=False)

In [35]:
import plotly.express as px
fig = px.box(group0, y="rent_price")
fig.show()

In [37]:
#XGB function
def XGB(df):
    #DF['rent_price'] = np.log(DF['rent_price'])
    #DF['Avg_price_neigh'] = np.log(DF['Avg_price_neigh'])
    #Split data into train and test set + label target value
    y = df.rent_price
    X = df.drop(['rent_price'], axis=1)
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.25, random_state=0)
    #Saving test_y rows index
    test_index = test_y.index
    my_imputer = SimpleImputer()
    train_X = my_imputer.fit_transform(train_X)
    test_X = my_imputer.transform(test_X)

    #Scaling features with Standart Scaler
    scaler = StandardScaler()
    train_sc =scaler.fit_transform(train_X)
    test_sc = scaler.transform(test_X)

    dtrain = xgb.DMatrix(train_sc, label=train_y)
    dtest = xgb.DMatrix(test_sc, label=test_y)
    #Default parameters was changed after applying 'hypermarameters tunning'.
    params = {
    'colsample_bytree': 1,
    'eta': 0.01,
    'eval_metric': 'mae',
    'max_depth': 11,
    'min_child_weight': 5,
    'objective': 'reg:linear',
    'subsample': 0.9}
    #Fitting the model.
    model = xgb.train(params, dtrain, num_boost_round= 3000, evals=[(dtest, "Test")], early_stopping_rounds=50)
    
    #Predicting results
    tuned_xgb = model.predict(dtest)
    tunned_xgb_tr = model.predict(dtrain)
    #Now we will create a new dataframe for evaluation using test_index
    evaluation = DF.loc[test_index]
    evaluation['predictions_tuned_Xgb'] = tuned_xgb.astype(int)
    evaluation['residuals_tuned_Xgb_abs'] = abs(evaluation['rent_price'] - evaluation['predictions_tuned_Xgb'])
    evaluation['residuals_tuned_Xgb'] = evaluation['rent_price'] - evaluation['predictions_tuned_Xgb']
    evaluation['resid_percent_Xgb'] = evaluation['residuals_tuned_Xgb'] *100/evaluation['rent_price']
    evaluation['resid_percent_Xgb_abs'] = evaluation['residuals_tuned_Xgb_abs'] *100/evaluation['rent_price']
    
    return evaluation

#Function LGB
def LGB(df):
    #df['rent_price'] = np.log(df['rent_price'])   
    #Separating our data into features dataset x and our target dataset y 
    x=df.drop(['rent_price'], axis=1)
    y=df.rent_price 
    #Now splitting our dataset into test and train 
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25, random_state=0)
    
    #Saving test_y rows index
    test_index = y_test.index

    # Scaling features with Standart Scaler
    scaler = StandardScaler()
    train_sc =scaler.fit_transform(x_train)
    test_sc = scaler.transform(x_test)

    # create dataset for lightgbm
    lgb_train = lgb.Dataset(x_train, y_train)
    lgb_eval = lgb.Dataset(x_test, y_test, reference=lgb_train)
    
    # specify your configurations as a dict
    params = {}
    params['max_bin'] = 10
    params['learning_rate'] = 0.0021
    params['num_iterations'] = 4000
    params['boosting_type'] = 'gbdt'
    params['objective'] = 'regression'
    params['metric'] = 'l1'          
    params['sub_feature'] = 0.5      
    params['bagging_fraction'] = 0.85 
    params['num_leaves'] = 1000            
    params['min_hessian'] = 0.05     
    params['verbose'] = 0

    # Fitting the model
    gbm = lgb.train(params,
                    lgb_train,
                    num_boost_round=3000,
                    valid_sets=lgb_eval,
                    early_stopping_rounds=50)
    
    #Predicting results
    y_pred = gbm.predict(x_test, num_iteration=gbm.best_iteration)
    
    #Saving test_y rows index
    evaluation = DF.loc[test_index]
    evaluation['predictions_lgb'] = y_pred.tolist()
    evaluation['residuals_lgb_abs'] = abs(evaluation['rent_price'] - evaluation['predictions_lgb'])
    evaluation['residuals_lgb'] = evaluation['rent_price'] - evaluation['predictions_lgb']
    evaluation['resid_percent_lgb'] = evaluation['residuals_lgb'] *100/evaluation['rent_price']
    evaluation['resid_percent_lgb_abs'] = evaluation['residuals_lgb_abs'] *100/evaluation['rent_price']

    results = evaluation['resid_percent_lgb'].describe()
    
    return evaluation

#Function for KNN
def KNN(df):
    #df['rent_price'] = np.log(df['rent_price'])
    #df['Avg_price_neigh'] = np.log(df['Avg_price_neigh'])
    #Separating our data into features dataset x and our target dataset y 
    X=df.drop(['rent_price'], axis=1)
    y=df.rent_price 
    #Now splitting our dataset into test and train 
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.25)  
    
    #Saving test_y rows index
    test_index = y_test.index
    
    steps = [('scaler', MinMaxScaler()), 
             ('model', KNeighborsRegressor())]
    pipe = Pipeline(steps)


    params = {'model__n_neighbors': range(2, 3),
              'model__metric': ['minkowski', 'manhattan'], 
              'scaler': [MinMaxScaler(), MaxAbsScaler()]}        

    gs = GridSearchCV(pipe, param_grid=params, cv=10, 
                      return_train_score=False, n_jobs=4, 
                      scoring = 'neg_mean_squared_error')

    gs.fit(X_train, y_train)
    gs_results = pd.DataFrame(gs.cv_results_)
    
    y_pred_train = gs.predict(X_train)
    y_pred_test = gs.predict(X_test)

    train_MAE = mean_absolute_error((y_train), (y_pred_train))
    test_MAE = mean_absolute_error((y_test),(y_pred_test))
    print(f'Train MAE: {train_MAE:.3f}')
    print(f'Test MAE: {train_MAE:.3f}')
    
        #Saving test_y rows index
    evaluation = DF.loc[test_index]
    evaluation['predictions_knn'] = y_pred_test.tolist()
    evaluation['residuals_knn_abs'] = abs(evaluation['rent_price'] - evaluation['predictions_knn'])
    evaluation['residuals_knn'] = evaluation['rent_price'] - evaluation['predictions_knn']
    evaluation['resid_percent_knn_abs'] = evaluation['residuals_knn_abs'] *100/evaluation['rent_price']
    evaluation['resid_percent_knn'] = evaluation['residuals_knn'] *100/evaluation['rent_price']
    results = evaluation['resid_percent_knn'].describe()
    
    return evaluation

In [38]:
#XGB MODEL
results_XGB  = XGB(df)

[0]	Test-mae:1462.16882
Will train until Test-mae hasn't improved in 50 rounds.
[1]	Test-mae:1447.54932
[2]	Test-mae:1433.05481
[3]	Test-mae:1418.71167
[4]	Test-mae:1404.57629
[5]	Test-mae:1390.55664
[6]	Test-mae:1376.66150
[7]	Test-mae:1362.92224
[8]	Test-mae:1349.29053
[9]	Test-mae:1335.81030
[10]	Test-mae:1322.41773
[11]	Test-mae:1309.18274
[12]	Test-mae:1296.13391
[13]	Test-mae:1283.21606
[14]	Test-mae:1270.40552
[15]	Test-mae:1257.71008
[16]	Test-mae:1245.14197
[17]	Test-mae:1232.72498
[18]	Test-mae:1220.40112
[19]	Test-mae:1208.26672
[20]	Test-mae:1196.20056
[21]	Test-mae:1184.26807
[22]	Test-mae:1172.44873
[23]	Test-mae:1160.74085
[24]	Test-mae:1149.12952
[25]	Test-mae:1137.62341
[26]	Test-mae:1126.28296
[27]	Test-mae:1115.10852
[28]	Test-mae:1103.95606
[29]	Test-mae:1093.02637
[30]	Test-mae:1082.11890
[31]	Test-mae:1071.32019
[32]	Test-mae:1060.58557
[33]	Test-mae:1050.03162
[34]	Test-mae:1039.62451
[35]	Test-mae:1029.29382
[36]	Test-mae:1019.06690
[37]	Test-mae:1008.89679
[38]

[323]	Test-mae:201.22041
[324]	Test-mae:201.14940
[325]	Test-mae:201.09181
[326]	Test-mae:201.03580
[327]	Test-mae:200.96287
[328]	Test-mae:200.87579
[329]	Test-mae:200.77794
[330]	Test-mae:200.69002
[331]	Test-mae:200.61687
[332]	Test-mae:200.55502
[333]	Test-mae:200.48898
[334]	Test-mae:200.41614
[335]	Test-mae:200.34312
[336]	Test-mae:200.28247
[337]	Test-mae:200.21640
[338]	Test-mae:200.15367
[339]	Test-mae:200.08727
[340]	Test-mae:200.03253
[341]	Test-mae:199.95732
[342]	Test-mae:199.88675
[343]	Test-mae:199.83566
[344]	Test-mae:199.77562
[345]	Test-mae:199.70799
[346]	Test-mae:199.65877
[347]	Test-mae:199.63200
[348]	Test-mae:199.59164
[349]	Test-mae:199.52881
[350]	Test-mae:199.47925
[351]	Test-mae:199.43016
[352]	Test-mae:199.37831
[353]	Test-mae:199.32086
[354]	Test-mae:199.28955
[355]	Test-mae:199.24641
[356]	Test-mae:199.22789
[357]	Test-mae:199.16554
[358]	Test-mae:199.12758
[359]	Test-mae:199.10494
[360]	Test-mae:199.07738
[361]	Test-mae:199.04182
[362]	Test-mae:199.02512


In [39]:
#LGB MODEL:
results_LGB  = LGB(df)


Found `num_iterations` in params. Will use it instead of argument



[1]	valid_0's l1: 555.601
Training until validation scores don't improve for 50 rounds
[2]	valid_0's l1: 554.747
[3]	valid_0's l1: 554.233
[4]	valid_0's l1: 553.278
[5]	valid_0's l1: 552.726
[6]	valid_0's l1: 551.79
[7]	valid_0's l1: 550.889
[8]	valid_0's l1: 550.385
[9]	valid_0's l1: 549.435
[10]	valid_0's l1: 548.465
[11]	valid_0's l1: 547.553
[12]	valid_0's l1: 546.7
[13]	valid_0's l1: 545.75
[14]	valid_0's l1: 544.937
[15]	valid_0's l1: 544.004
[16]	valid_0's l1: 543.244
[17]	valid_0's l1: 542.294
[18]	valid_0's l1: 541.461
[19]	valid_0's l1: 541.022
[20]	valid_0's l1: 540.257
[21]	valid_0's l1: 539.428
[22]	valid_0's l1: 538.511
[23]	valid_0's l1: 537.694
[24]	valid_0's l1: 536.761
[25]	valid_0's l1: 535.895
[26]	valid_0's l1: 534.981
[27]	valid_0's l1: 534.106
[28]	valid_0's l1: 533.443
[29]	valid_0's l1: 532.531
[30]	valid_0's l1: 531.645
[31]	valid_0's l1: 530.835
[32]	valid_0's l1: 530.026
[33]	valid_0's l1: 529.597
[34]	valid_0's l1: 528.701
[35]	valid_0's l1: 528.249
[36]	va

[312]	valid_0's l1: 363.098
[313]	valid_0's l1: 362.653
[314]	valid_0's l1: 362.438
[315]	valid_0's l1: 362.023
[316]	valid_0's l1: 361.639
[317]	valid_0's l1: 361.196
[318]	valid_0's l1: 360.819
[319]	valid_0's l1: 360.359
[320]	valid_0's l1: 359.902
[321]	valid_0's l1: 359.647
[322]	valid_0's l1: 359.413
[323]	valid_0's l1: 358.98
[324]	valid_0's l1: 358.526
[325]	valid_0's l1: 358.105
[326]	valid_0's l1: 357.756
[327]	valid_0's l1: 357.295
[328]	valid_0's l1: 356.853
[329]	valid_0's l1: 356.429
[330]	valid_0's l1: 356.031
[331]	valid_0's l1: 355.591
[332]	valid_0's l1: 355.346
[333]	valid_0's l1: 354.94
[334]	valid_0's l1: 354.629
[335]	valid_0's l1: 354.185
[336]	valid_0's l1: 353.762
[337]	valid_0's l1: 353.321
[338]	valid_0's l1: 353.095
[339]	valid_0's l1: 352.659
[340]	valid_0's l1: 352.434
[341]	valid_0's l1: 352.012
[342]	valid_0's l1: 351.619
[343]	valid_0's l1: 351.245
[344]	valid_0's l1: 350.826
[345]	valid_0's l1: 350.412
[346]	valid_0's l1: 349.997
[347]	valid_0's l1: 34

[621]	valid_0's l1: 276.683
[622]	valid_0's l1: 276.514
[623]	valid_0's l1: 276.301
[624]	valid_0's l1: 276.131
[625]	valid_0's l1: 275.969
[626]	valid_0's l1: 275.759
[627]	valid_0's l1: 275.568
[628]	valid_0's l1: 275.391
[629]	valid_0's l1: 275.181
[630]	valid_0's l1: 275.001
[631]	valid_0's l1: 274.831
[632]	valid_0's l1: 274.65
[633]	valid_0's l1: 274.459
[634]	valid_0's l1: 274.289
[635]	valid_0's l1: 274.086
[636]	valid_0's l1: 273.94
[637]	valid_0's l1: 273.749
[638]	valid_0's l1: 273.535
[639]	valid_0's l1: 273.383
[640]	valid_0's l1: 273.188
[641]	valid_0's l1: 273.039
[642]	valid_0's l1: 272.848
[643]	valid_0's l1: 272.742
[644]	valid_0's l1: 272.647
[645]	valid_0's l1: 272.464
[646]	valid_0's l1: 272.273
[647]	valid_0's l1: 272.096
[648]	valid_0's l1: 271.944
[649]	valid_0's l1: 271.794
[650]	valid_0's l1: 271.597
[651]	valid_0's l1: 271.397
[652]	valid_0's l1: 271.194
[653]	valid_0's l1: 271.031
[654]	valid_0's l1: 270.874
[655]	valid_0's l1: 270.705
[656]	valid_0's l1: 27

[917]	valid_0's l1: 239.964
[918]	valid_0's l1: 239.884
[919]	valid_0's l1: 239.807
[920]	valid_0's l1: 239.736
[921]	valid_0's l1: 239.663
[922]	valid_0's l1: 239.583
[923]	valid_0's l1: 239.493
[924]	valid_0's l1: 239.423
[925]	valid_0's l1: 239.349
[926]	valid_0's l1: 239.271
[927]	valid_0's l1: 239.191
[928]	valid_0's l1: 239.097
[929]	valid_0's l1: 239.007
[930]	valid_0's l1: 238.912
[931]	valid_0's l1: 238.829
[932]	valid_0's l1: 238.781
[933]	valid_0's l1: 238.708
[934]	valid_0's l1: 238.617
[935]	valid_0's l1: 238.566
[936]	valid_0's l1: 238.486
[937]	valid_0's l1: 238.415
[938]	valid_0's l1: 238.329
[939]	valid_0's l1: 238.245
[940]	valid_0's l1: 238.155
[941]	valid_0's l1: 238.067
[942]	valid_0's l1: 238.02
[943]	valid_0's l1: 237.953
[944]	valid_0's l1: 237.885
[945]	valid_0's l1: 237.807
[946]	valid_0's l1: 237.72
[947]	valid_0's l1: 237.638
[948]	valid_0's l1: 237.573
[949]	valid_0's l1: 237.492
[950]	valid_0's l1: 237.444
[951]	valid_0's l1: 237.363
[952]	valid_0's l1: 23

[1209]	valid_0's l1: 224.377
[1210]	valid_0's l1: 224.331
[1211]	valid_0's l1: 224.292
[1212]	valid_0's l1: 224.269
[1213]	valid_0's l1: 224.237
[1214]	valid_0's l1: 224.201
[1215]	valid_0's l1: 224.167
[1216]	valid_0's l1: 224.129
[1217]	valid_0's l1: 224.1
[1218]	valid_0's l1: 224.067
[1219]	valid_0's l1: 224.039
[1220]	valid_0's l1: 224.013
[1221]	valid_0's l1: 223.979
[1222]	valid_0's l1: 223.952
[1223]	valid_0's l1: 223.917
[1224]	valid_0's l1: 223.901
[1225]	valid_0's l1: 223.865
[1226]	valid_0's l1: 223.825
[1227]	valid_0's l1: 223.789
[1228]	valid_0's l1: 223.757
[1229]	valid_0's l1: 223.718
[1230]	valid_0's l1: 223.681
[1231]	valid_0's l1: 223.64
[1232]	valid_0's l1: 223.611
[1233]	valid_0's l1: 223.574
[1234]	valid_0's l1: 223.541
[1235]	valid_0's l1: 223.503
[1236]	valid_0's l1: 223.47
[1237]	valid_0's l1: 223.434
[1238]	valid_0's l1: 223.402
[1239]	valid_0's l1: 223.368
[1240]	valid_0's l1: 223.334
[1241]	valid_0's l1: 223.299
[1242]	valid_0's l1: 223.272
[1243]	valid_0's l

[1508]	valid_0's l1: 217.227
[1509]	valid_0's l1: 217.205
[1510]	valid_0's l1: 217.19
[1511]	valid_0's l1: 217.174
[1512]	valid_0's l1: 217.155
[1513]	valid_0's l1: 217.14
[1514]	valid_0's l1: 217.123
[1515]	valid_0's l1: 217.104
[1516]	valid_0's l1: 217.093
[1517]	valid_0's l1: 217.08
[1518]	valid_0's l1: 217.066
[1519]	valid_0's l1: 217.05
[1520]	valid_0's l1: 217.04
[1521]	valid_0's l1: 217.025
[1522]	valid_0's l1: 217.002
[1523]	valid_0's l1: 216.988
[1524]	valid_0's l1: 216.978
[1525]	valid_0's l1: 216.957
[1526]	valid_0's l1: 216.943
[1527]	valid_0's l1: 216.929
[1528]	valid_0's l1: 216.912
[1529]	valid_0's l1: 216.896
[1530]	valid_0's l1: 216.885
[1531]	valid_0's l1: 216.873
[1532]	valid_0's l1: 216.851
[1533]	valid_0's l1: 216.837
[1534]	valid_0's l1: 216.823
[1535]	valid_0's l1: 216.812
[1536]	valid_0's l1: 216.804
[1537]	valid_0's l1: 216.787
[1538]	valid_0's l1: 216.774
[1539]	valid_0's l1: 216.765
[1540]	valid_0's l1: 216.751
[1541]	valid_0's l1: 216.736
[1542]	valid_0's l1

[1792]	valid_0's l1: 213.853
[1793]	valid_0's l1: 213.846
[1794]	valid_0's l1: 213.841
[1795]	valid_0's l1: 213.831
[1796]	valid_0's l1: 213.821
[1797]	valid_0's l1: 213.811
[1798]	valid_0's l1: 213.804
[1799]	valid_0's l1: 213.795
[1800]	valid_0's l1: 213.792
[1801]	valid_0's l1: 213.782
[1802]	valid_0's l1: 213.771
[1803]	valid_0's l1: 213.766
[1804]	valid_0's l1: 213.759
[1805]	valid_0's l1: 213.752
[1806]	valid_0's l1: 213.743
[1807]	valid_0's l1: 213.735
[1808]	valid_0's l1: 213.727
[1809]	valid_0's l1: 213.715
[1810]	valid_0's l1: 213.712
[1811]	valid_0's l1: 213.707
[1812]	valid_0's l1: 213.694
[1813]	valid_0's l1: 213.686
[1814]	valid_0's l1: 213.68
[1815]	valid_0's l1: 213.675
[1816]	valid_0's l1: 213.664
[1817]	valid_0's l1: 213.654
[1818]	valid_0's l1: 213.645
[1819]	valid_0's l1: 213.642
[1820]	valid_0's l1: 213.633
[1821]	valid_0's l1: 213.623
[1822]	valid_0's l1: 213.615
[1823]	valid_0's l1: 213.606
[1824]	valid_0's l1: 213.599
[1825]	valid_0's l1: 213.589
[1826]	valid_0'

[2087]	valid_0's l1: 211.924
[2088]	valid_0's l1: 211.917
[2089]	valid_0's l1: 211.914
[2090]	valid_0's l1: 211.913
[2091]	valid_0's l1: 211.911
[2092]	valid_0's l1: 211.906
[2093]	valid_0's l1: 211.903
[2094]	valid_0's l1: 211.903
[2095]	valid_0's l1: 211.897
[2096]	valid_0's l1: 211.894
[2097]	valid_0's l1: 211.893
[2098]	valid_0's l1: 211.893
[2099]	valid_0's l1: 211.887
[2100]	valid_0's l1: 211.881
[2101]	valid_0's l1: 211.88
[2102]	valid_0's l1: 211.877
[2103]	valid_0's l1: 211.871
[2104]	valid_0's l1: 211.868
[2105]	valid_0's l1: 211.861
[2106]	valid_0's l1: 211.86
[2107]	valid_0's l1: 211.853
[2108]	valid_0's l1: 211.85
[2109]	valid_0's l1: 211.841
[2110]	valid_0's l1: 211.833
[2111]	valid_0's l1: 211.825
[2112]	valid_0's l1: 211.827
[2113]	valid_0's l1: 211.819
[2114]	valid_0's l1: 211.813
[2115]	valid_0's l1: 211.81
[2116]	valid_0's l1: 211.805
[2117]	valid_0's l1: 211.801
[2118]	valid_0's l1: 211.794
[2119]	valid_0's l1: 211.788
[2120]	valid_0's l1: 211.786
[2121]	valid_0's l

[2378]	valid_0's l1: 210.798
[2379]	valid_0's l1: 210.796
[2380]	valid_0's l1: 210.79
[2381]	valid_0's l1: 210.792
[2382]	valid_0's l1: 210.789
[2383]	valid_0's l1: 210.783
[2384]	valid_0's l1: 210.783
[2385]	valid_0's l1: 210.782
[2386]	valid_0's l1: 210.781
[2387]	valid_0's l1: 210.775
[2388]	valid_0's l1: 210.773
[2389]	valid_0's l1: 210.769
[2390]	valid_0's l1: 210.767
[2391]	valid_0's l1: 210.761
[2392]	valid_0's l1: 210.756
[2393]	valid_0's l1: 210.751
[2394]	valid_0's l1: 210.747
[2395]	valid_0's l1: 210.746
[2396]	valid_0's l1: 210.746
[2397]	valid_0's l1: 210.743
[2398]	valid_0's l1: 210.739
[2399]	valid_0's l1: 210.735
[2400]	valid_0's l1: 210.731
[2401]	valid_0's l1: 210.726
[2402]	valid_0's l1: 210.723
[2403]	valid_0's l1: 210.718
[2404]	valid_0's l1: 210.715
[2405]	valid_0's l1: 210.71
[2406]	valid_0's l1: 210.706
[2407]	valid_0's l1: 210.699
[2408]	valid_0's l1: 210.694
[2409]	valid_0's l1: 210.694
[2410]	valid_0's l1: 210.692
[2411]	valid_0's l1: 210.684
[2412]	valid_0's

[2665]	valid_0's l1: 210.051
[2666]	valid_0's l1: 210.053
[2667]	valid_0's l1: 210.049
[2668]	valid_0's l1: 210.043
[2669]	valid_0's l1: 210.044
[2670]	valid_0's l1: 210.043
[2671]	valid_0's l1: 210.039
[2672]	valid_0's l1: 210.038
[2673]	valid_0's l1: 210.037
[2674]	valid_0's l1: 210.032
[2675]	valid_0's l1: 210.031
[2676]	valid_0's l1: 210.025
[2677]	valid_0's l1: 210.025
[2678]	valid_0's l1: 210.021
[2679]	valid_0's l1: 210.014
[2680]	valid_0's l1: 210.013
[2681]	valid_0's l1: 210.013
[2682]	valid_0's l1: 210.01
[2683]	valid_0's l1: 210.007
[2684]	valid_0's l1: 210.007
[2685]	valid_0's l1: 210.005
[2686]	valid_0's l1: 210.004
[2687]	valid_0's l1: 209.997
[2688]	valid_0's l1: 209.993
[2689]	valid_0's l1: 209.995
[2690]	valid_0's l1: 209.992
[2691]	valid_0's l1: 209.994
[2692]	valid_0's l1: 209.99
[2693]	valid_0's l1: 209.984
[2694]	valid_0's l1: 209.981
[2695]	valid_0's l1: 209.978
[2696]	valid_0's l1: 209.975
[2697]	valid_0's l1: 209.972
[2698]	valid_0's l1: 209.969
[2699]	valid_0's

[2972]	valid_0's l1: 209.429
[2973]	valid_0's l1: 209.423
[2974]	valid_0's l1: 209.42
[2975]	valid_0's l1: 209.418
[2976]	valid_0's l1: 209.418
[2977]	valid_0's l1: 209.419
[2978]	valid_0's l1: 209.42
[2979]	valid_0's l1: 209.418
[2980]	valid_0's l1: 209.416
[2981]	valid_0's l1: 209.415
[2982]	valid_0's l1: 209.413
[2983]	valid_0's l1: 209.41
[2984]	valid_0's l1: 209.409
[2985]	valid_0's l1: 209.409
[2986]	valid_0's l1: 209.409
[2987]	valid_0's l1: 209.41
[2988]	valid_0's l1: 209.408
[2989]	valid_0's l1: 209.409
[2990]	valid_0's l1: 209.408
[2991]	valid_0's l1: 209.408
[2992]	valid_0's l1: 209.405
[2993]	valid_0's l1: 209.405
[2994]	valid_0's l1: 209.406
[2995]	valid_0's l1: 209.405
[2996]	valid_0's l1: 209.407
[2997]	valid_0's l1: 209.406
[2998]	valid_0's l1: 209.404
[2999]	valid_0's l1: 209.403
[3000]	valid_0's l1: 209.402
[3001]	valid_0's l1: 209.397
[3002]	valid_0's l1: 209.395
[3003]	valid_0's l1: 209.394
[3004]	valid_0's l1: 209.392
[3005]	valid_0's l1: 209.39
[3006]	valid_0's l1

[3267]	valid_0's l1: 209.09
[3268]	valid_0's l1: 209.089
[3269]	valid_0's l1: 209.09
[3270]	valid_0's l1: 209.085
[3271]	valid_0's l1: 209.083
[3272]	valid_0's l1: 209.081
[3273]	valid_0's l1: 209.076
[3274]	valid_0's l1: 209.074
[3275]	valid_0's l1: 209.073
[3276]	valid_0's l1: 209.074
[3277]	valid_0's l1: 209.071
[3278]	valid_0's l1: 209.073
[3279]	valid_0's l1: 209.072
[3280]	valid_0's l1: 209.068
[3281]	valid_0's l1: 209.068
[3282]	valid_0's l1: 209.064
[3283]	valid_0's l1: 209.06
[3284]	valid_0's l1: 209.061
[3285]	valid_0's l1: 209.058
[3286]	valid_0's l1: 209.054
[3287]	valid_0's l1: 209.052
[3288]	valid_0's l1: 209.052
[3289]	valid_0's l1: 209.052
[3290]	valid_0's l1: 209.052
[3291]	valid_0's l1: 209.051
[3292]	valid_0's l1: 209.05
[3293]	valid_0's l1: 209.05
[3294]	valid_0's l1: 209.049
[3295]	valid_0's l1: 209.049
[3296]	valid_0's l1: 209.048
[3297]	valid_0's l1: 209.047
[3298]	valid_0's l1: 209.047
[3299]	valid_0's l1: 209.047
[3300]	valid_0's l1: 209.045
[3301]	valid_0's l1

[3564]	valid_0's l1: 208.837
[3565]	valid_0's l1: 208.838
[3566]	valid_0's l1: 208.837
[3567]	valid_0's l1: 208.835
[3568]	valid_0's l1: 208.834
[3569]	valid_0's l1: 208.833
[3570]	valid_0's l1: 208.834
[3571]	valid_0's l1: 208.834
[3572]	valid_0's l1: 208.834
[3573]	valid_0's l1: 208.83
[3574]	valid_0's l1: 208.829
[3575]	valid_0's l1: 208.829
[3576]	valid_0's l1: 208.83
[3577]	valid_0's l1: 208.827
[3578]	valid_0's l1: 208.829
[3579]	valid_0's l1: 208.83
[3580]	valid_0's l1: 208.827
[3581]	valid_0's l1: 208.827
[3582]	valid_0's l1: 208.828
[3583]	valid_0's l1: 208.827
[3584]	valid_0's l1: 208.826
[3585]	valid_0's l1: 208.828
[3586]	valid_0's l1: 208.828
[3587]	valid_0's l1: 208.825
[3588]	valid_0's l1: 208.822
[3589]	valid_0's l1: 208.822
[3590]	valid_0's l1: 208.819
[3591]	valid_0's l1: 208.818
[3592]	valid_0's l1: 208.816
[3593]	valid_0's l1: 208.81
[3594]	valid_0's l1: 208.808
[3595]	valid_0's l1: 208.808
[3596]	valid_0's l1: 208.809
[3597]	valid_0's l1: 208.809
[3598]	valid_0's l

[3863]	valid_0's l1: 208.649
[3864]	valid_0's l1: 208.649
[3865]	valid_0's l1: 208.642
[3866]	valid_0's l1: 208.644
[3867]	valid_0's l1: 208.641
[3868]	valid_0's l1: 208.64
[3869]	valid_0's l1: 208.638
[3870]	valid_0's l1: 208.638
[3871]	valid_0's l1: 208.639
[3872]	valid_0's l1: 208.639
[3873]	valid_0's l1: 208.638
[3874]	valid_0's l1: 208.633
[3875]	valid_0's l1: 208.632
[3876]	valid_0's l1: 208.631
[3877]	valid_0's l1: 208.631
[3878]	valid_0's l1: 208.631
[3879]	valid_0's l1: 208.632
[3880]	valid_0's l1: 208.63
[3881]	valid_0's l1: 208.626
[3882]	valid_0's l1: 208.627
[3883]	valid_0's l1: 208.628
[3884]	valid_0's l1: 208.624
[3885]	valid_0's l1: 208.623
[3886]	valid_0's l1: 208.621
[3887]	valid_0's l1: 208.621
[3888]	valid_0's l1: 208.623
[3889]	valid_0's l1: 208.623
[3890]	valid_0's l1: 208.622
[3891]	valid_0's l1: 208.621
[3892]	valid_0's l1: 208.621
[3893]	valid_0's l1: 208.624
[3894]	valid_0's l1: 208.622
[3895]	valid_0's l1: 208.62
[3896]	valid_0's l1: 208.619
[3897]	valid_0's 

In [40]:
# KNN MODEL:
results_KNN  = KNN(df)

Train MAE: 188.676
Test MAE: 188.676


In [19]:
#Saving csv file with all predictions
results_XGB.to_csv('results_XGB.csv', index=False)
results_LGB.to_csv('results_LGB.csv', index=False)
results_KNN.to_csv('results_KNN.csv', index=False)