In [17]:
import os
import re
from sideseeing_tools import sideseeing
import pandas as pd
import folium
from folium.plugins import HeatMap

ds = sideseeing.SideSeeingDS(root_dir='/home/renzo/Documents/GitHub/temp-SideSeeing-Exporter/dataset/')

INFO. Loading data.
INFO. Done.


In [32]:
wifi_data_dict = {}
gps_data_dict = {}

for sample in ds.iterator:
    wifi_df = sample.wifi_networks[["Datetime UTC", "SSID", "level", "frequency"]]
    wifi_df["Datetime UTC"] = pd.to_datetime(wifi_df["Datetime UTC"]).astype('int64') // 10**6
    wifi_df.columns = ["unix_ms", "SSID", "level", "frequency"]
    wifi_df.reset_index(drop=True, inplace=True)
    wifi_data_dict[sample.name] = wifi_df

for sample in ds.iterator:
    gps_df = sample.geolocation_points[["Datetime UTC", "latitude", "longitude"]]
    gps_df["Datetime UTC"] = pd.to_datetime(gps_df["Datetime UTC"]).astype(int) // 10 ** 6
    gps_df.columns = ["unix_ms", "latitude", "longitude"]
    gps_df["latitude"] = gps_df["latitude"].astype(float)
    gps_df["longitude"] = gps_df["longitude"].astype(float)
    gps_df.reset_index(drop=True, inplace=True)
    gps_data_dict[sample.name] = gps_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wifi_df["Datetime UTC"] = pd.to_datetime(wifi_df["Datetime UTC"]).astype('int64') // 10**6
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wifi_df["Datetime UTC"] = pd.to_datetime(wifi_df["Datetime UTC"]).astype('int64') // 10**6
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wifi_df["Datetime UTC"]

In [33]:
wifi_data_dict[ds.instance.name]

Unnamed: 0,unix_ms,SSID,level,frequency
0,1758029626339,eduroam,-75,2412
1,1758029626339,FCTH_Vista,-91,5220
2,1758029626339,FCTH_Vista,-79,2412
3,1758029626339,eduroam,-93,5680
4,1758029626339,PTP-SAISP-5G-1,-84,5260
...,...,...,...,...
12956,1758030348250,eduroam,-91,2442
12957,1758030348250,BB Wireless,-87,2462
12958,1758030348250,Ag_USP_Inovacao,-88,2412
12959,1758030348250,MyWiFi2062,-88,2412


In [34]:
gps_data_dict[ds.instance.name]

Unnamed: 0,unix_ms,latitude,longitude
0,1758894616295,-23.555509,-46.678440
1,1758894624003,-23.555489,-46.678450
2,1758894631700,-23.555473,-46.678428
3,1758894635154,-23.555465,-46.678427
4,1758894636044,-23.555576,-46.678358
...,...,...,...
132,1758894764151,-23.555476,-46.678468
133,1758894765147,-23.555483,-46.678458
134,1758894766147,-23.555485,-46.678455
135,1758894767164,-23.555491,-46.678448


In [None]:
def join_dfs(wifi_df: pd.DataFrame, gps_df: pd.DataFrame):

	# Merge the two DataFrames on the 'unix_ms' column
	merged_df = pd.merge_asof(
		wifi_df.sort_values("unix_ms"),
		gps_df.sort_values("unix_ms"),
		on="unix_ms",
		direction="nearest",
		tolerance=1000  # tolerance in milliseconds
	)

	merged_df = merged_df[merged_df["latitude"].notna() & merged_df["longitude"].notna()]

	return merged_df

def average_dfs(dfs: list[pd.DataFrame]):
	"""
	Average the DataFrames in the list by 'unix_ms' and return a new DataFrame.
	"""

	if not dfs:
		return pd.DataFrame(columns=['unix_ms', 'level', 'latitude', 'longitude'])

	combined_df = pd.concat(dfs, ignore_index=True)

	averaged_df = combined_df.groupby('unix_ms', as_index=False)['level'].mean()
	first_occurrence = combined_df.drop_duplicates(subset='unix_ms')[['unix_ms', 'frequency', 'latitude', 'longitude']]
	result_df = pd.merge(averaged_df, first_occurrence, on='unix_ms')

	result_df.drop(columns=['frequency'], inplace=True)
	result_df.reset_index(drop=True, inplace=True)

	return result_df

def get_merged(path: str):

	wifi = pd.read_csv(os.path.join(path, "wifi.csv"), header=None, on_bad_lines='skip', skiprows=1)
	wifi = clean_wifi_df(wifi)

	gps = pd.read_csv(os.path.join(path, "gps.csv"), on_bad_lines='skip')
	gps = clean_gps_df(gps)

	joined = join_dfs(wifi, gps)

	wifi24 = []
	wifi5 = []

	for frequency in joined["frequency"].unique():

		value = frequency // 1000
		filtered = joined[joined["frequency"] == frequency]

		if value == 2:
			wifi24.append(filtered)
		elif value == 5:
			wifi5.append(filtered)

	return average_dfs(wifi24), average_dfs(wifi5)