In [40]:
import sklearn
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from PyRadioLoc.Utils.GeoUtils import GeoUtils
%run utils.ipynb

In [41]:
def get_distance_array(y_pred, y_test):
    dist = list()
    for y_sample, y_pred in  zip(y_test, y_pred):
        dist.append(GeoUtils.distanceInKm(y_sample[0], y_sample[1], y_pred[0], y_pred[1]))
    return dist

In [42]:
def get_quality_metrics(y_pred, y_test):
    dist = get_distance_array(y_pred, y_test)
    localization_error_min = min(dist)*1000
    localization_error_max = max(dist)*1000
    localization_error_mean = np.mean(dist)*1000
    localization_error_standard_deviation = np.std(dist)*1000
    print("min:            %0.2f m" % localization_error_min)
    print("max:            %0.2f m" % localization_error_max)
    print("mean:           %0.2f m" % localization_error_mean)
    print("std deviation:  %0.2f m" % localization_error_standard_deviation)

In [43]:
def write_to_csv(y_pred, test_id, file_path):
    y_pred_dataframe = pd.DataFrame(y_pred, columns=['lat_pred', 'lon_pred'])
    y_pred_dataframe = pd.concat([y_pred_dataframe, test_id], axis=1)
    y_pred_dataframe.to_csv(path_or_buf=file_path, index = False)

In [44]:
db_all = pd.read_csv("database/LocTreino_Equipe_4.csv")
db_train = pd.read_csv("database/LocTreino.csv")
db_test = pd.read_csv("database/LocTeste.csv")
db_train.head()

Unnamed: 0,lat,lon,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3,pontoId
0,-8.05438,-34.95408,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2,7531
1,-8.05337,-34.94889,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4,7748
2,-8.05561,-34.95431,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2,7035
3,-8.05562,-34.95405,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2,7026
4,-8.05007,-34.95474,-82.999429,-98.255357,-87.818571,-101.535,-97.59125,-90.656905,-80.617222,-99.934667,-96.982778,3,5,3,8797


In [45]:
X_train = db_train.drop("lat", axis = 1)
X_train = X_train.drop("lon", axis = 1)
X_train = X_train.drop("pontoId", axis = 1)
X_test = db_test.drop("lat", axis  = 1)
X_test = X_test.drop("lon", axis = 1)
X_test = X_test.drop("pontoId", axis = 1)
X_train.head()

Unnamed: 0,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3
0,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2
1,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4
2,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2
3,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2
4,-82.999429,-98.255357,-87.818571,-101.535,-97.59125,-90.656905,-80.617222,-99.934667,-96.982778,3,5,3


In [46]:
lat_train = db_train["lat"]
lat_test = db_test["lat"]
lat_train.head()

0   -8.05438
1   -8.05337
2   -8.05561
3   -8.05562
4   -8.05007
Name: lat, dtype: float64

In [47]:
lon_train = db_train["lon"]
lon_test = X_test1["lon"]
lon_train.head()

0   -34.95408
1   -34.94889
2   -34.95431
3   -34.95405
4   -34.95474
Name: lon, dtype: float64

In [48]:
y_train = pd.concat([lat_train, lon_train], axis=1)
y_test = pd.concat([lat_test, lon_test], axis=1)
test_id = db_test["pontoId"]
y_ref = pd.concat([y_test, test_id], axis=1)
y_ref.head()

Unnamed: 0,lat,lon,pontoId
0,-8.05542,-34.94907,7204.0
1,-8.05565,-34.95977,6997.0
2,-8.04842,-34.95607,9258.0
3,-8.05127,-34.94758,8489.0
4,-8.0524,-34.94757,7929.0


In [49]:
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=5)
neigh.fit(X_train, y_train)
y_pred_cso = neigh.predict(X_test)
write_to_csv(y_pred_cso, test_id, "result/Resultados_Equipe4_Metodo_8.csv")

In [50]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
results = [mean_absolute_error(y_test, y_pred_cso), mean_squared_error(y_test, y_pred_cso), np.sqrt(mean_squared_error(y_test, y_pred_cso)), r2_score(y_test, y_pred_cso)]
results

[0.0024096800000004788,
 1.5109820080005076e-05,
 0.0038871352021771863,
 0.053487340642696335]

In [51]:
y_test_np = y_test.to_numpy()
get_quality_metrics(y_test_np, y_pred_cso)   

min:            4.87 m
max:            1543.57 m
mean:           493.04 m
std deviation:  352.35 m


In [52]:
from sklearn.model_selection import GridSearchCV
parameters = {'n_neighbors': list(range(1, 100))}
neigh = KNeighborsRegressor()

grid = GridSearchCV(
    neigh,
    parameters,
    cv = 5,
    scoring = 'neg_mean_squared_error',
    return_train_score = True,
    refit = True
)
grid.fit(X_train, y_train)
grid.best_params_

KeyboardInterrupt: 

In [53]:
from sklearn import preprocessing
scaler = preprocessing.RobustScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled  = scaler.transform(X_test)
neigh = KNeighborsRegressor(n_neighbors=3)
neigh.fit(X_train_scaled, y_train)
y_pred_cso_scaled = neigh.predict(X_test_scaled)
get_quality_metrics(y_test_np, y_pred_cso_scaled)

min:            15.54 m
max:            1551.35 m
mean:           486.23 m
std deviation:  358.75 m


In [54]:
write_to_csv(y_pred_cso_scaled, test_id, "result/Resultados_Equipe4_Metodo_8.csv")