In [16]:
import sklearn
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from PyRadioLoc.Utils.GeoUtils import GeoUtils
%run utils.ipynb

In [17]:
def get_distance_array(y_pred, y_test):
    dist = list()
    for y_sample, y_pred in  zip(y_test, y_pred):
        dist.append(GeoUtils.distanceInKm(y_sample[0], y_sample[1], y_pred[0], y_pred[1]))
    return dist

In [75]:
def get_quality_metrics(y_pred, y_test):
    dist = get_distance_array(y_pred, y_test)
    localization_error_min = min(dist)*1000
    localization_error_max = max(dist)*1000
    localization_error_mean = np.mean(dist)*1000
    localization_error_standard_deviation = np.std(dist)*1000
    print("min:            %0.2f m" % localization_error_min)
    print("max:            %0.2f m" % localization_error_max)
    print("mean:           %0.2f m" % localization_error_mean)
    print("std deviation:  %0.2f m" % localization_error_standard_deviation)

In [19]:
def write_to_csv(y_pred, test_id, file_path):
    y_pred_dataframe = pd.DataFrame(y_pred, columns=['lat', 'lon'])
    y_pred_dataframe = pd.concat([y_pred_dataframe, test_id], axis=1)
    y_pred_dataframe.to_csv(path_or_buf=file_path, index = False)

In [20]:
db_all = pd.read_csv("database/LocTreino_Equipe_4.csv")
db_train = pd.read_csv("database/LocTreino.csv")
db_test = pd.read_csv("database/LocTeste.csv")
db_train.head()

Unnamed: 0,lat,lon,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3,pontoId
0,-8.05438,-34.95408,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2,7531
1,-8.05337,-34.94889,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4,7748
2,-8.0539,-34.95955,-85.255714,-99.635714,-86.551429,-94.52,-100.225,-88.232857,-78.955,-92.08,-66.41,6,7,1,7636
3,-8.05561,-34.95431,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2,7035
4,-8.05562,-34.95405,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2,7026


In [52]:
X_train = db_train.drop("lat", axis = 1)
X_train = X_train.drop("lon", axis = 1)
X_train = X_train.drop("pontoId", axis = 1)
X_test = merge_on_pontoId(db_all, db_test)
X_test = X_test.drop("lat", axis  = 1)
X_test = X_test.drop("lon", axis = 1)
X_test = X_test.drop("pontoId", axis = 1)
cols = ['rssi_1_1_y', 'rssi_1_2_y', 'rssi_1_3_y', 'rssi_2_1_y', 'rssi_2_2_y', 'rssi_2_3_y', 'rssi_3_1_y', 'rssi_3_2_y', 'rssi_3_3_y', 'delay_1_y','delay_2_y','delay_3_y']
X_test = X_test.drop(cols, axis=1)
X_test.rename(columns={'rssi_1_1_x': 'rssi_1_1' , 'rssi_1_2_x': 'rssi_1_2', 'rssi_1_3_x':'rssi_1_3', 'rssi_2_1_x':'rssi_2_1', 'rssi_2_2_x':'rssi_2_2', 'rssi_2_3_x':'rssi_2_3', 'rssi_3_1_x':'rssi_3_1', 'rssi_3_2_x':'rssi_3_2', 'rssi_3_3_x':'rssi_3_3', 'delay_1_x':'delay_1','delay_2_x':'delay_2','delay_3_x':'delay_3'}, inplace=True)
X_train.head()

Unnamed: 0,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3
0,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2
1,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4
2,-85.255714,-99.635714,-86.551429,-94.52,-100.225,-88.232857,-78.955,-92.08,-66.41,6,7,1
3,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2
4,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2


In [53]:
X_test1 = merge_on_pontoId(db_all, db_test)
cols = ['rssi_1_1_y', 'rssi_1_2_y', 'rssi_1_3_y', 'rssi_2_1_y', 'rssi_2_2_y', 'rssi_2_3_y', 'rssi_3_1_y', 'rssi_3_2_y', 'rssi_3_3_y', 'delay_1_y','delay_2_y','delay_3_y']
X_test1 = X_test1.drop(cols, axis=1)
X_test1.rename(columns={'rssi_1_1_x': 'rssi_1_1' , 'rssi_1_2_x': 'rssi_1_2', 'rssi_1_3_x':'rssi_1_3', 'rssi_2_1_x':'rssi_2_1', 'rssi_2_2_x':'rssi_2_2', 'rssi_2_3_x':'rssi_2_3', 'rssi_3_1_x':'rssi_3_1', 'rssi_3_2_x':'rssi_3_2', 'rssi_3_3_x':'rssi_3_3', 'delay_1_x':'delay_1','delay_2_x':'delay_2','delay_3_x':'delay_3'}, inplace=True)
lat_train = db_train["lat"]
lat_test = X_test1["lat"]
lat_train.head()

0   -8.05438
1   -8.05337
2   -8.05390
3   -8.05561
4   -8.05562
Name: lat, dtype: float64

In [54]:
lon_train = db_train["lon"]
lon_test = X_test1["lon"]
lon_train.head()

0   -34.95408
1   -34.94889
2   -34.95955
3   -34.95431
4   -34.95405
Name: lon, dtype: float64

In [55]:
y_train = pd.concat([lat_train, lon_train], axis=1)
y_test = pd.concat([lat_test, lon_test], axis=1)
test_id = db_test["pontoId"]
y_ref = pd.concat([y_test, test_id], axis=1)
y_ref.head()

Unnamed: 0,lat,lon,pontoId
0,-8.05361,-34.94907,8998.0
1,-8.05111,-34.95977,9361.0
2,-8.04926,-34.95607,7693.0
3,-8.04744,-34.94758,6959.0
4,-8.05043,-34.94757,8456.0


In [69]:
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=5)
neigh.fit(X_train, y_train)
y_pred_cso = neigh.predict(X_test)
write_to_csv(y_pred_cso, test_id, "result/Resultados_Equipe4_Metodo_8.csv")

In [76]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
results = [mean_absolute_error(y_test, y_pred_cso), mean_squared_error(y_test, y_pred_cso), np.sqrt(mean_squared_error(y_test, y_pred_cso)), r2_score(y_test, y_pred_cso)]
results

[0.00035919333333316316,
 3.892324400003052e-07,
 0.0006238849573441446,
 0.9698212450779446]

In [77]:
y_test_np = y_test.to_numpy()
get_quality_metrics(y_test_np, y_pred_cso)   

min:            1.34 m
max:            382.89 m
mean:           63.92 m
std deviation:  73.52 m
