In [1]:
import pandas as pd
import numpy as np
from data_preprocess import data_cleaning, data_spliting, scaling  ### Dont forget to restart the Kernel and re-import if any changes has been made to the methods ###

ds = pd.read_csv("data/antwerp_ds_weather-data_2019.csv", index_col=0)

In [2]:
x_rssi, y = data_cleaning(ds, wathear_data=False)

x_weather, y = data_cleaning(ds, wathear_data=True)

Size before cleaning:  (130429, 113)
Size after cleaning:  (55375, 113)
Size before cleaning:  (130429, 113)
Size after cleaning:  (55375, 113)


In [3]:
# make 5 different train, test and validation sets 
# the result is going to be the average of these 

In [4]:
x_rssi_scaled = scaling(scaler='MinMax', x=x_rssi.drop(columns='RX Time', axis=1))  # remove the timestamp. will be added later 
x_w_scaled = scaling(scaler='MinMax', x=x_weather.drop(columns='RX Time', axis=1))  # remove the timestamp. will be added later 

x_rssi_scaled['timestamp'] = x_rssi['RX Time']
x_w_scaled['timestamp'] = x_weather['RX Time']

x_rssi_train, x_rssi_test, x_rssi_val, y_rssi_train, y_rssi_test, y_rssi_val = data_spliting(x_scaled=x_rssi_scaled, y=y, train_size=0.7, random_state=42)
x_w_train, x_w_test, x_w_val, y_w_train, y_w_test, y_w_val = data_spliting(x_scaled=x_w_scaled, y=y, train_size=0.7, random_state=42)

Training shape:      (38762, 73)
Test shape:          (8307, 73)
Validation shape:    (8306, 73)
Training shape:      (38762, 109)
Test shape:          (8307, 109)
Validation shape:    (8306, 109)


In [5]:
# saving files 
x_rssi_train.to_csv('files/x_rssi_train.csv')
x_rssi_test.to_csv('files/x_rssi_test.csv')
pd.DataFrame(y_rssi_train, columns=['lat', 'lon']).to_csv('files/y_rssi_train.csv')
pd.DataFrame(y_rssi_test, columns=['lat', 'lon']).to_csv('files/y_rssi_test.csv')

x_w_train.to_csv('files/x_w_train.csv')
x_w_test.to_csv('files/x_w_test.csv')
pd.DataFrame(y_w_train, columns=['lat', 'lon']).to_csv('files/y_w_train.csv')
pd.DataFrame(y_w_test, columns=['lat', 'lon']).to_csv('files/y_w_trainy_w_test.csv')

In [6]:
from sklearn.neighbors import KNeighborsRegressor

model_rssi = KNeighborsRegressor(algorithm='ball_tree')

model_rssi.fit(x_rssi_train.drop(columns='timestamp', axis=1), y_rssi_train)

y_rssi_pred = model_rssi.predict(x_rssi_test.drop(columns='timestamp'))

In [7]:
from performance_eval import *

error_stats(y_rssi_test, y_rssi_pred)

295.88544443241665

In [8]:
model_weather = KNeighborsRegressor(algorithm='ball_tree')

model_weather.fit(x_w_train.drop(columns='timestamp'), y_w_train)
y_w_pred = model_weather.predict(x_w_test.drop(columns='timestamp'))
error_stats(y_w_test, y_w_pred)

261.37364430634125

In [13]:
timestamp1 = pd.DataFrame(x_rssi_test['timestamp'])
timestamp2 = pd.DataFrame(x_w_test['timestamp'])


# df1 = pd.DataFrame(y_rssi_pred, columns=['pred_lat_rssi', 'pred_lon_rssi'])
# df2 = pd.DataFrame(y_w_pred, columns=['pred_lat_comb', 'pred_lon_comb'])




In [8]:
ds_raw = pd.read_json('data/lorawan_antwerp_2019_dataset.json')


In [28]:
for id, row in ds_raw.iterrows():
    # print(row['gateways'])
    for gateway in row['gateways']:
        if gateway['rx_time']['time']=='2019-01-22T10:23:14.645768995+01:00':
            print(row) 

adr                                                          1
hdop                                                      0.65
counter                                                  19226
longitude                                              4.41329
sf                                                           7
airtime                                               0.112896
dev_addr                                              07000EFD
gateways     [{'id': 'FF0107C9', 'rssi': -109, 'snr': 3.0, ...
latitude                                             51.230606
dev_eui                                       3432333860376818
payload      00f0000324ec4c42ab398d406766263f04f082204105f0...
channel                                                      8
Name: 31604, dtype: object


In [35]:
ts = timestamp1.sample(n=3)
ts['gw_info'] = 0

for i, t in ts.iterrows():
    for id, row in ds_raw.iterrows():
    # print(row['gateways'])
        for gateway in row['gateways']:
            if gateway['rx_time']['time']==t['timestamp']:
                ts['gw_info'][i] = id 
    

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  ts['gw_info'][i] = id
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ts['gw_info'][i] = id
You are setting val

In [None]:
pd.concat([timestamp1, pd.DataFrame(y_rssi_test, columns=['lat', 'lon']), df1], axis=1).to_csv('files/position_pred_RSSI.csv')
pd.concat([timestamp2, pd.DataFrame(y_w_test, columns=['lat', 'lon']), df2], axis=1).to_csv('files/position_pred_weather-comb.csv')


In [37]:
ds_raw['gateways'][106799]

[{'id': '080E00B9',
  'rssi': -97,
  'snr': 9.75,
  'rx_time': {'ts_type': None, 'time': '2019-02-05T19:20:44.799+01:00'},
  'esp': -97.437256},
 {'id': 'FF01072B',
  'rssi': -107,
  'snr': 8.0,
  'rx_time': {'ts_type': 'GPS_RADIO',
   'time': '2019-02-05T19:20:44.684187817+01:00'},
  'esp': -107.63892},
 {'id': 'FF01753E',
  'rssi': -113,
  'snr': -2.0,
  'rx_time': {'ts_type': 'GPS_RADIO',
   'time': '2019-02-05T19:20:44.684189469+01:00'},
  'esp': -117.12443}]