In [1]:
import sklearn
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
db_train = pd.read_csv("database/LocTreino.csv")
db_test = pd.read_csv("database/LocTeste.csv")
db_train.head()

Unnamed: 0,lat,lon,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3,pontoId
0,-8.05438,-34.95408,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2,7531
1,-8.05337,-34.94889,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4,7748
2,-8.0539,-34.95955,-85.255714,-99.635714,-86.551429,-94.52,-100.225,-88.232857,-78.955,-92.08,-66.41,6,7,1,7636
3,-8.05561,-34.95431,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2,7035
4,-8.05562,-34.95405,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2,7026


In [3]:
X_train = db_train.drop("lat", axis = 1)
X_train = X_train.drop("lon", axis = 1)
X_test = db_test.drop("lat", axis  = 1)
X_test = X_test.drop("lon", axis = 1)
X_train.head()

Unnamed: 0,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3,pontoId
0,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2,7531
1,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4,7748
2,-85.255714,-99.635714,-86.551429,-94.52,-100.225,-88.232857,-78.955,-92.08,-66.41,6,7,1,7636
3,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2,7035
4,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2,7026


In [4]:
lat_train = db_train["lat"]
lat_test = db_test["lat"]
lat_train.head()

0   -8.05438
1   -8.05337
2   -8.05390
3   -8.05561
4   -8.05562
Name: lat, dtype: float64

In [5]:
lon_train = db_train["lon"]
lon_test = db_test["lon"]
lon_train.head()

0   -34.95408
1   -34.94889
2   -34.95955
3   -34.95431
4   -34.95405
Name: lon, dtype: float64

In [19]:
y_train = pd.concat([lat_train, lon_train], axis=1)
y_test = pd.concat([lat_test, lon_test], axis=1)
y_test.head()

Unnamed: 0,lat,lon
0,-8.05847,-34.94977
1,-8.05088,-34.95318
2,-8.05446,-34.95612
3,-8.05514,-34.94997
4,-8.05143,-34.95325


In [7]:
from sklearn.ensemble import RandomForestRegressor
reg_cso = RandomForestRegressor(n_estimators = 1000, random_state = 42)
reg_cso.fit(X_train, y_train)
y_pred_cso = reg_cso.predict(X_test)

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
results = [mean_absolute_error(y_test, y_pred_cso), mean_squared_error(y_test, y_pred_cso), np.sqrt(mean_squared_error(y_test, y_pred_cso)), r2_score(y_test, y_pred_cso)]
results

[0.00016555593046389206,
 5.761139225562796e-08,
 0.00024002373269247347,
 0.9956944142882098]

In [None]:
from PyRadioLoc.Utils.GeoUtils import GeoUtils
dist = list()
y_test_np = y_test.to_numpy()
for y_sample, y_pred in  zip(y_test_np, y_pred_cso):
    dist.append(GeoUtils.distanceInKm(y_sample[0], y_sample[1], y_pred[0], y_pred[1]))
    
dist

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = {'n_estimators': list(range(100, 150)), 'criterion': ["mse", "mae"]}
random_forest = RandomForestRegressor()

grid = GridSearchCV(
    random_forest,
    parameters,
    cv = 10,
    scoring = 'neg_mean_squared_error',
    return_train_score = True,
    refit = True
)
grid.fit(X_train, y_train)
grid.best_params_

In [None]:
from sklearn.neural_network import MLPRegressor
activation = ["identity", "logistic", "tanh", "relu"]
solver = ["lbfgs", "sgd", "adam"]
learning_rate = ["constant", "invscaling", "adaptive"]

parameters = ["activation" : activation,
              "solver" : solver, 
              "learning_rate" : learning rate
             ]

mlp_regressor = MLPRegressor()
grid = GridSearchCV(
    mlp_regressor,
    parameters,
    cv = 10,
    scoring = 'neg_mean_squared_error',
    return_train_score = True,
    refit = True
)
grid.fit(X_train, y_train)
grid.best_params_