# TP geo-localisation

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from geopy.distance import geodesic

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER

In [None]:
# load train and test data

# train set
df_mess_train = pd.read_csv('mess_train_list.csv')

# test set
df_mess_test = pd.read_csv('mess_test_list.csv')

# position associated to train set
pos_train = pd.read_csv('pos_train_list.csv') 

In [None]:
df_mess_train.nunique()

## Stations de base

In [None]:
# determine all Base stations that received at least 1 message
listOfBs = np.union1d(np.unique(df_mess_train['bsid']),
                      np.unique(df_mess_test['bsid'])) 

print(f"Number of stations: {len(listOfBs)}")

In [None]:
df_mess_all = pd.concat([df_mess_train, df_mess_test])
df_bsloc_all = df_mess_all.groupby('bsid')[['bs_lat', 'bs_lng']].first()

lon_min = df_bsloc_all['bs_lng'].min()
lon_max = df_bsloc_all['bs_lng'].max()

lat_min = df_bsloc_all['bs_lat'].min()
lat_max = df_bsloc_all['bs_lat'].max()

print(f"lon (min / max) = {lon_min:.3f}, {lon_max:.3f}")
print(f"lat (min / max) = {lat_min:.3f}, {lat_max:.3f}")

df_bsloc_all.head()

In [None]:
fig = plt.figure(figsize=(12,12))

ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())

plt.grid()
ax.set_extent([round(lon_min - 1.), round(lon_max + 1.),
               round(lat_min - 1.), round(lat_max + 1.),])

ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND, facecolor=cfeature.COLORS['land'])
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.coastlines(resolution='50m')

# plot BS
ax.plot(df_bsloc_all['bs_lng'], df_bsloc_all['bs_lat'],
        '.', color='red', label='all')

ax.xaxis.set_major_formatter(LONGITUDE_FORMATTER) 
ax.yaxis.set_major_formatter(LATITUDE_FORMATTER)

gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER

ax.set_title("Base stations")
ax.set_xlabel("longitude")
ax.set_ylabel("latitude")

plt.legend()
plt.show()

### BS train vs BS test

In [None]:
df_bsloc_train = df_mess_train.groupby('bsid')[['bs_lat', 'bs_lng']].first()
df_bsloc_test = df_mess_test.groupby('bsid')[['bs_lat', 'bs_lng']].first()

df_bsloc_intersect = df_bsloc_train.merge(df_bsloc_test, how='inner',
                                          on=['bsid','bs_lat', 'bs_lng'] )

df_bsloc_train_only = pd.merge(df_bsloc_train, df_bsloc_test,
                               on=['bsid','bs_lat', 'bs_lng'],
                               how="outer", indicator=True) \
                        .query('_merge=="left_only"') \
                        .drop(columns=['_merge'])

df_bsloc_test_only = pd.merge(df_bsloc_test, df_bsloc_train,
                               on=['bsid','bs_lat', 'bs_lng'],
                               how="outer", indicator=True) \
                        .query('_merge=="left_only"') \
                        .drop(columns=['_merge'])

print(f"all={df_bsloc_all.shape}")
print(f"intersection={df_bsloc_intersect.shape}")
print(f"train_only={df_bsloc_train_only.shape}")
print(f"test_only{df_bsloc_test_only.shape}")

In [None]:
fig = plt.figure(figsize=(12,12))

ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())

plt.grid()
ax.set_extent([round(lon_min - 1.), round(lon_max + 1.),
               round(lat_min - 1.), round(lat_max + 1.),])

ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND, facecolor=cfeature.COLORS['land'])
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.coastlines(resolution='50m')

# plot BS
ax.plot(df_bsloc_intersect['bs_lng'], df_bsloc_intersect['bs_lat'],
        '.', color='red', label='both')
ax.plot(df_bsloc_train_only['bs_lng'], df_bsloc_train_only['bs_lat'],
        '.', color='green', label='train only')
ax.plot(df_bsloc_test_only['bs_lng'], df_bsloc_test_only['bs_lat'],
        '.', color='orange', label='test only')

ax.xaxis.set_major_formatter(LONGITUDE_FORMATTER) 
ax.yaxis.set_major_formatter(LATITUDE_FORMATTER)

gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER

ax.set_title("Base stations")
ax.set_xlabel("longitude")
ax.set_ylabel("latitude")

plt.legend()
plt.show()

In [None]:
df_bsloc_test_only.head(10)

## Positions des émetteurs (train only)

In [None]:
pos_train.head()

In [None]:
pos_train.shape

In [None]:
fig = plt.figure(figsize=(12,12))

ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())

plt.grid()
ax.set_extent([round(lon_min - 1.), round(lon_max + 1.),
               round(lat_min - 1.), round(lat_max + 1.),])

ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND, facecolor=cfeature.COLORS['land'])
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.coastlines(resolution='50m')

# plot BS
ax.plot(df_bsloc_all['bs_lng'], df_bsloc_all['bs_lat'],
        '.', color='red', label='base station')
ax.plot(pos_train['lng'], pos_train['lat'],
        '.', color='green', label='device')

ax.xaxis.set_major_formatter(LONGITUDE_FORMATTER) 
ax.yaxis.set_major_formatter(LATITUDE_FORMATTER)

gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER

ax.set_title("Base stations and device positions")
ax.set_xlabel("longitude")
ax.set_ylabel("latitude")

plt.legend()
plt.show()

### anomalies

In [None]:
df_except = df_mess_all[df_mess_all['bs_lat'] > 60]

print(f"Number of stations = {len(np.unique(df_except['bsid']))}")
print(f"Number of messages = {df_except.shape[0]}")

### RSSI fonction de la distance

In [None]:
def vincenty_vec(vec_coord):
    """ Now using geodesic distance instead of Vincenty """
    vin_vec_dist = np.zeros(vec_coord.shape[0])
    if vec_coord.shape[1] != 4:
        print('ERROR: Bad number of columns (shall be = 4)')
    else:
        vin_vec_dist = [geodesic(vec_coord[m, 0:2], vec_coord[m, 2:]).meters for m in range(vec_coord.shape[0])]

    return vin_vec_dist

In [None]:
df_distance = pd.concat([df_mess_train, pos_train], axis=1)
df_distance.shape

In [None]:
df_distance.columns

In [None]:
df_distance['distance_m'] = df_distance.apply(lambda x: geodesic([x['bs_lat'], x['bs_lng']], [x['lat'], x['lng']]).meters, axis=1)

In [None]:
df_distance = df_distance.sort_values(by=['distance_m'])

# near: distance <= 50 km
df_distance_near = df_distance[df_distance['distance_m'] <= 50000]
# far: distance > 50 km
df_distance_far = df_distance[df_distance['distance_m'] > 50000]

In [None]:
fig = plt.figure(figsize=(15, 10))

plt.grid()

plt.title('RSSI vs distance')
plt.xlabel('distance (m)')
plt.ylabel('rssi (dBm)')

plt.scatter(df_distance_near['distance_m'], df_distance_near['rssi'], s=2, label='near')
plt.scatter(df_distance_far['distance_m'], df_distance_far['rssi'], s=2, label='far')

plt.show()

#### zoom

In [None]:
fig = plt.figure(figsize=(15, 10))

plt.grid()

plt.title('RSSI vs distance')
plt.xlabel('distance (m)')
plt.ylabel('rssi (dBm)')

plt.xlim(0.0, 50000.0)
plt.scatter(df_distance_near['distance_m'], df_distance_near['rssi'], s=2)
plt.scatter(df_distance_far['distance_m'], df_distance_far['rssi'], s=2)

plt.show()

In [None]:
df_distance_near['nseq'].unique()

In [None]:

fig = plt.figure(figsize=(15, 10))

plt.grid()

plt.title('RSSI vs distance')
plt.xlabel('distance (m)')
plt.ylabel('rssi (dBm)')

plt.xlim(0.0, 50000.0)

for v in df_distance_near['nseq'].unique():
    tmp = df_distance_near[df_distance_near['nseq'] == v]
    if v != 1.5:
        continue
    plt.scatter(tmp['distance_m'], tmp['rssi'], s=2, label=str(v))

plt.legend()
plt.show()

In [None]:
print(f"Nombre de messages \"courtes distances\": {df_distance_near['distance_m'].count()}")
print(f"Nombre de messages \"longues distances\": {df_distance_far['distance_m'].count()}")

In [None]:
df_bs_count = df_distance_near.groupby('bsid')[['messid']].count().sort_values(by=['messid'], ascending=False)

df_bs_count.head()

In [None]:
df_mess_count = df_distance_near.groupby('messid')[['bsid']].count().sort_values(by=['bsid'], ascending=False)

df_mess_count.head()

In [None]:
#bsids = list(df_bs_count.index[:5])
messids = list(df_mess_count.index[:15])

fig = plt.figure(figsize=(15, 10))

plt.grid()

plt.title('RSSI vs distance')
plt.xlabel('distance (m)')
plt.ylabel('rssi (dBm)')

plt.xlim(0.0, 50000.0)
#plt.xlim(0.0, 0.002)

# for bsid in bsids:
for messid in messids:
    #tmp = df_distance_near[df_distance_near['bsid'] == bsid]
    tmp = df_distance_near[df_distance_near['messid'] == messid]

    x1 = tmp['distance_m'].values
    y1 = 1.0 / tmp['rssi'].values
    a = np.polyfit(x1, y1, 3)
    y2 = np.polyval(a, x1)
    plt.scatter(x1, y1, s=2, label=str(messid))
    plt.plot(x1, y2)

plt.legend()
plt.show()

In [None]:
df_bsloc_near = df_distance_near.groupby('bsid')[['bs_lat', 'bs_lng']].first()
df_bsloc_far = df_distance_far.groupby('bsid')[['bs_lat', 'bs_lng']].first()

In [None]:
fig = plt.figure(figsize=(12,12))

ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())

plt.grid()
ax.set_extent([round(lon_min - 1.), round(lon_max + 1.),
               round(lat_min - 1.), round(lat_max + 1.),])

ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND, facecolor=cfeature.COLORS['land'])
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.coastlines(resolution='50m')

# plot BS
ax.plot(df_bsloc_near['bs_lng'], df_bsloc_near['bs_lat'],
        '.', color='red', label='near')
ax.plot(df_bsloc_far['bs_lng'], df_bsloc_far['bs_lat'],
        '.', color='green', label='far')

ax.xaxis.set_major_formatter(LONGITUDE_FORMATTER) 
ax.yaxis.set_major_formatter(LATITUDE_FORMATTER)

gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER

ax.set_title("Base stations")
ax.set_xlabel("longitude")
ax.set_ylabel("latitude")

plt.legend()
plt.show()