In [268]:
import folium
import pandas as pd
import numpy as np
from sklearn import linear_model

Read Data
==

In [326]:
df = pd.read_csv('resources/data/LocTreino_Equipe_2.csv')
df = df.dropna(axis='index')
lats = df['lat']
lons = df['lon']
positions = zip(lats, lons)

In [327]:
df_bts = pd.read_csv('resources/data/dados_BTSs.csv')
bts_lats = df_bts['lat']
bts_lons = df_bts['lon']
bts_positions = zip(bts_lats, bts_lons)

In [328]:
for column in df.columns[3:9]:
    print(column)

pathBTS1
pathBTS2
pathBTS3
pathBTS4
pathBTS5
pathBTS6


## Plot Original Points


In [329]:
center_lat = np.mean([lat for lat in list(df['lat'])])
center_lon = np.mean([lon for lon in list(df['lon'])])
original_pts_map = folium.Map(location=[center_lat, center_lon],
                              zoom_start=15)

In [330]:
for location in positions:
    marker = folium.CircleMarker(location=location,
                                 radius=1,
                                 color='blue',
                                 fill_opacity=0.1)
    marker.add_to(original_pts_map)
for location in bts_positions:
    marker = folium.Marker(location=location)
    marker.add_to(original_pts_map)
original_pts_map.save("resources/maps/original_pts_map.html")

In [331]:
original_pts_map

## Fit Linear Model

In [332]:
samples = df[df.columns[3:]].values
target_lat = df['lat'].values
target_lon = df['lon'].values

In [333]:
lat_reg = linear_model.LinearRegression(normalize=True)
lat_reg.fit(samples, target_lat)

lon_reg = linear_model.LinearRegression(normalize=True)
lon_reg.fit(samples, target_lon)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

### Coefficients

In [334]:
sorted(zip(df.columns[3:], lat_reg.coef_),
       key=lambda (_, x): abs(x),
       reverse=True)

[('taBTS4', 0.0035854290441074675),
 ('taBTS6', -0.0020430583840570248),
 ('taBTS1', 0.00091818464285629289),
 ('taBTS5', -0.00038185346472620563),
 ('taBTS3', 0.00022429472690001679),
 ('pathBTS4', -5.1713681061307371e-05),
 ('pathBTS2', 4.0779562748663624e-05),
 ('pathBTS6', -2.946942906378778e-05),
 ('taBTS2', -1.931311165281353e-05),
 ('pathBTS3', 1.901230614307072e-05),
 ('pathBTS1', 5.7193151028019661e-06),
 ('pathBTS5', -2.0112481386731297e-06)]

In [335]:
predicted_lat = lat_reg.predict(samples)
predicted_lon = lon_reg.predict(samples)

### Calculate Error

In [336]:
# for p_lat, p_long, (lat, long) in list(zip(predicted_lat, predicted_long, targets)):
#     print(100*(p_lat - lat)/lat, 100*(p_long - long)/long)
# plt.plot(predicted_lat, [x for x, _ in targets], marker='x', linestyle='None')
# plt.show()
from sklearn.metrics import mean_squared_error as mse
from math import sqrt

def rmse(y_pred, y_real):
    return sqrt(mse(y_pred, y_real))

print('Latitude Error:', rmse(predicted_lat, target_lat))
print('Longitude Error:', rmse(predicted_long, target_lon))

('Latitude Error:', 0.0013198155197817786)
('Longitude Error:', 0.0)


In [337]:
result_map = folium.Map(location=[center_lat, center_lon],
                        zoom_start=13,
                        tiles='CartoDB dark_matter')
n_points = 10000
predicted_positions = list(zip(predicted_lat, predicted_long))
for location in predicted_positions[:n_points]:
    marker = folium.CircleMarker(location=location, color='red', weight=1, radius=3)
    marker.add_to(result_map)
for location in positions[:n_points]:
    marker = folium.CircleMarker(location=location, color='blue', weight=1, radius=3)
    marker.add_to(result_map)
for location in bts_positions:
    marker = folium.Marker(location=location)
    marker.add_to(result_map)
for p1, p2 in list(zip(positions, predicted_positions))[:n_points]:
    folium.PolyLine([p1, p2], color="green", weight=1, opacity=0.5).add_to(result_map)
result_map.save("resources/maps/result_map.html")

## Fit KNN Model

In [338]:
from sklearn import neighbors

In [339]:
lat_reg = neighbors.KNeighborsRegressor(n_neighbors=1)
long_reg = neighbors.KNeighborsRegressor(n_neighbors=1)

In [340]:
lat_reg.fit(samples, target_lat)
long_reg.fit(samples, target_lon)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=1, p=2,
          weights='uniform')

In [341]:
predicted_lat = lat_reg.predict(samples)
predicted_long = long_reg.predict(samples)

In [342]:
# from folium.plugins import MarkerCluster
folium_map = folium.Map(location=[center_lat, center_lon],
                        zoom_start=13,
                        tiles='CartoDB dark_matter')
# cluster = MarkerCluster(list(zip(predicted_lat, predicted_long))).add_to(folium_map)
n_points = -1
predicted_positions = list(zip(predicted_lat, predicted_long))
for location in predicted_positions[:n_points]:
    marker = folium.CircleMarker(location=location,
                                 color='red',
                                 weight=1,
                                 radius=3)
    marker.add_to(folium_map)
for location in positions[:n_points]:
    marker = folium.CircleMarker(location=location,
                                 color='blue',
                                 weight=1,
                                 radius=3)
    marker.add_to(folium_map)
for location in bts_positions:
    marker = folium.Marker(location=location)
    marker.add_to(folium_map)
for p1, p2 in list(zip(positions, predicted_positions))[:n_points]:
    folium.PolyLine([p1, p2],
                    color="green",
                    weight=1,
                    opacity=0.5).add_to(folium_map)
folium_map.save("resources/maps/knn_result_map.html")

In [343]:
# for p_lat, p_long, (lat, long) in list(zip(predicted_lat, predicted_long, targets)):
#     print(100*(p_lat - lat)/lat, 100*(p_long - long)/long)
# plt.plot(predicted_lat, [x for x, _ in targets], marker='x', linestyle='None')
# plt.show()
from sklearn.metrics import mean_squared_error as mse
from math import sqrt

def rmse(y_pred, y_real):
    return sqrt(mse(y_pred, y_real))

print(rmse(predicted_lat, target_lat))
print(rmse(predicted_long, target_lon))

0.0
0.0


In [344]:
from sklearn.model_selection import cross_validate

In [345]:
cv_results = cross_validate(lat_reg,
                            samples,
                            target_lat,
                            cv=10,
                            return_train_score=False)

In [346]:
cv_results['test_score']

array([ 0.95623138,  0.94955856,  0.95109055,  0.96697754,  0.95777126,
        0.93641567,  0.95880014,  0.93203664,  0.96924516,  0.96055802])

Fingerprint
==

In [347]:
for column in list(enumerate(df.columns)):
    print(column)
X = df[df.columns[1:3]].values
Y = df[df.columns[3]].values

(0, 'pontoId')
(1, 'lat')
(2, 'lon')
(3, 'pathBTS1')
(4, 'pathBTS2')
(5, 'pathBTS3')
(6, 'pathBTS4')
(7, 'pathBTS5')
(8, 'pathBTS6')
(9, 'taBTS1')
(10, 'taBTS2')
(11, 'taBTS3')
(12, 'taBTS4')
(13, 'taBTS5')
(14, 'taBTS6')


In [348]:
rssi_reg = linear_model.LinearRegression(normalize=True)
rssi_reg.fit(X, Y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

In [349]:
point = df[df.columns[1:3]].values[0]
rssi = rssi_reg.predict([point])
print(rssi, df[df.columns[3]].values[0])

(array([ 110.93807735]), 122.556666666667)
