## Import libraries

In [7]:
import numpy as np
import pandas as pd
from  matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
from sklearn.feature_selection import SequentialFeatureSelector
import xgboost
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
from sklearn.inspection import permutation_importance

## Import data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/M2/PIP 2023/Données/Data_development/train_v5.csv')
test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/M2/PIP 2023/Données/Data_development/test_v5.csv')
validation = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/M2/PIP 2023/Données/Data_development/validation_v5.csv')

In [4]:
Y_train = train[['loc_initiator_x', 'loc_initiator_y']]
X_train = train.drop(["loc_initiator_x",'loc_initiator_y'], axis=1, inplace=False)

Y_test = test[['loc_initiator_x', 'loc_initiator_y']]
X_test = test.drop(["loc_initiator_x",'loc_initiator_y'], axis=1, inplace=False)

Y_val = validation[['loc_initiator_x', 'loc_initiator_y']]
X_val = validation.drop(["loc_initiator_x",'loc_initiator_y'], axis=1, inplace=False)

## Cross validation

In [6]:
l_n_nei = []
for elem in np.linspace(70,300,15):
    l_n_nei.append(int(elem))
l_weigths = ['uniform', 'distance']
l_algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute']
l_leaf_size = l_n_nei.copy()

parameters = {'n_neighbors':l_n_nei,
             'weights': l_weigths,
             'algorithm': l_algorithm,
             'leaf_size' : l_leaf_size}
             
reg = KNeighborsRegressor()
clf = GridSearchCV(reg, parameters)
clf.fit(X_train,Y_train)

GridSearchCV(estimator=KNeighborsRegressor(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'leaf_size': [70, 86, 102, 119, 135, 152, 168, 185,
                                       201, 217, 234, 250, 267, 283, 300],
                         'n_neighbors': [70, 86, 102, 119, 135, 152, 168, 185,
                                         201, 217, 234, 250, 267, 283, 300],
                         'weights': ['uniform', 'distance']})

In [8]:
params_opti = clf.cv_results_['params'][clf.best_index_]
params_opti

{'algorithm': 'auto',
 'leaf_size': 70,
 'n_neighbors': 70,
 'weights': 'distance'}

## Predict

In [9]:
knn = KNeighborsRegressor(algorithm = params_opti['algorithm'], n_neighbors= params_opti['n_neighbors'], 
                          weights = params_opti['weights'], leaf_size = params_opti['leaf_size'])
knn = knn.fit(X_train,Y_train)
pred_test = knn.predict(X_test)
pred_val = knn.predict(X_val)

## Score

In [5]:
def score_distanceM(Xobserve,Yobserve,Xprevu,Yprevu):
    X=(Xobserve-Xprevu)**2
    Y=(Yobserve-Yprevu)**2
    return np.mean(np.sqrt(X+Y))

In [10]:
print("mae test : ", mean_absolute_error(Y_test, pred_test))
print("rmse test : ", np.sqrt(mean_squared_error(Y_test,pred_test)))
print("\n")
print("mae validation : ", mean_absolute_error(Y_val, pred_val))
print("rmse validation : ", np.sqrt(mean_squared_error(Y_val,pred_val)))

mae test :  1.5934578598848046
rmse test :  2.014218445226948


mae validation :  0.3677190578868672
rmse validation :  0.6518460819284185


In [11]:
print("score distance test : ",
      score_distanceM(Xobserve = Y_test['loc_initiator_x'],
                      Yobserve = Y_test['loc_initiator_y'],
                      Xprevu = pred_test[:,0],
                      Yprevu = pred_test[:,1]))

score distance test :  2.3440139437385907


In [12]:
print("score distance validation : ",
      score_distanceM(Xobserve = Y_val['loc_initiator_x'],
                      Yobserve = Y_val['loc_initiator_y'],
                      Xprevu = pred_val[:,0],
                      Yprevu = pred_val[:,1]))

score distance validation :  0.5409339464980442
