In [1]:
#imports
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from dotenv import load_dotenv
import os
from typing import Callable

load_dotenv()
raw_data_folder = os.getenv("rawDataDirectory")
data_folder = os.getenv("dataFolder")
privateLatStr = os.getenv("privateLat")
privateLonStr = os.getenv("privateLon")
privateRadStr = os.getenv("privateRad")

assert (
    privateLatStr is not None
    and privateLonStr is not None
    and privateRadStr is not None
)

privateLat = float(privateLatStr)
privateLon = float(privateLonStr)
privateRad = float(privateRadStr)

from util.clean_file import filter_by_distance

filterFunction: Callable[[float, float], bool] = lambda lat, lon: filter_by_distance(
    (lat, lon), (privateLat, privateLon, privateRad)
)

In [2]:
locations = pd.read_csv(
    f"{raw_data_folder}/data_V4_31-01",
    delimiter=";",
    header=0,
    dtype={
        "finePermission": bool,
        "foreGround": bool,
        "priority": int,
        "user": str,
        "time": int,
        "realLocation": str,
        "obfuscatedLocation": str,
    },
)
blobs=pd.read_csv(
    f"{raw_data_folder}/blobs_V4_31-01",
    delimiter=";",
    header=0,
    dtype={
        "user":str,
        "blobs":str
    }
    
)
locations["time"] = pd.to_datetime(locations["time"], unit="ms")

In [3]:
# data manipulation
# data manipulation
locations["realLocation"] = locations["realLocation"].apply(lambda x: eval(x) if pd.notnull(x) else (None, None, None))
locations["obfuscatedLocation"] = locations["obfuscatedLocation"].apply(lambda x: eval(x) if pd.notnull(x) else (None, None, None))
locations[["real_latitude", "real_longitude", "real_timestamp"]] = pd.DataFrame(locations["realLocation"].tolist(), index=locations.index)
locations[["obf_latitude", "obf_longitude", "obf_timestamp"]] = pd.DataFrame(locations["obfuscatedLocation"].tolist(), index=locations.index)
locations = locations.sort_values(by='time')
locations['date'] = locations['time'].dt.date
print(locations.shape)
locations = locations[
    locations.apply(
        lambda row: filterFunction(row["real_latitude"], row["real_longitude"]), axis=1
    )
]
print(locations.shape)
location_groups = locations.groupby(['date', 'user'])
print(location_groups.size())
print(locations.head())

(324771, 14)
(109428, 14)
date        user            
2025-01-14  cfcca27c720dfceb     8898
2025-01-15  a1eba85199a732e4      296
            cfcca27c720dfceb    35657
2025-01-16  218f4413e393d7d3      576
            cfcca27c720dfceb      582
2025-01-17  218f4413e393d7d3    19418
            b76cb5235efc9519     1198
            cfcca27c720dfceb    24801
            df4294d41666d9a7     1304
            e41a59c48d43c93e     1264
2025-01-18  cfcca27c720dfceb     3146
2025-01-19  cfcca27c720dfceb    11140
2025-01-21  b76cb5235efc9519       33
            cfcca27c720dfceb       33
            df4294d41666d9a7        4
            e41a59c48d43c93e       33
2025-01-22  b76cb5235efc9519      104
            df4294d41666d9a7       82
            e41a59c48d43c93e       80
2025-01-23  b76cb5235efc9519       42
            df4294d41666d9a7       43
            e41a59c48d43c93e       28
2025-01-24  b76cb5235efc9519      133
            df4294d41666d9a7      107
            e41a59c48d43c93e     

In [4]:
blobs['blobs']=blobs['blobs'].apply(eval)
# blobs['count']=blobs['blobs'].apply(len)
# blobs=blobs.loc[blobs.groupby("user")["count"].idxmax()]
blobs = (
    blobs.groupby('user')['blobs']
    .apply(lambda x: set().union(*x))  # Combine all lists into a single set
    .reset_index()
)

In [5]:
# maakt voor ieder device en per dag een groep aan
for (date, user), group in location_groups:
    map = folium.Map(
        location=[group["real_latitude"].mean(), group["real_longitude"].mean()],
        zoom_start=13,
    )

    # voor masten(comment out if not needed)
    # marker_cluster=MarkerCluster().add_to(map)
    # for coord in coordinates:
    #     folium.Marker([coord[1], coord[0]]).add_to(marker_cluster)

    # 2 trails
    real_location: list[tuple[float, float]] = []
    Obfuscated_location: list[tuple[float, float]] = []
    for index, row in group.iterrows():
        real_location.append((row["real_latitude"], row["real_longitude"]))
        Obfuscated_location.append((row["obf_latitude"], row["obf_longitude"]))
    # Filter out NaN values
    Obfuscated_location = [
        loc for loc in Obfuscated_location if not any(pd.isna(coord) for coord in loc)
    ]
    real_location = [
        loc for loc in real_location if not any(pd.isna(coord) for coord in loc)
    ]
    if Obfuscated_location:
        folium.PolyLine(
            locations=Obfuscated_location,
            color="#0000FF",
            tooltip="Obfuscated location",
        ).add_to(map)
    if real_location:
        folium.PolyLine(
            locations=real_location,
            color="#FF0000",
            tooltip="Real location",
        ).add_to(map)
    blob_rows = blobs[blobs["user"] == user]
    row = blob_rows.iloc[0]["blobs"]
    if row:
        row = [blob for blob in row if filterFunction(blob[0], blob[1])]
        for blob in row:
            folium.Circle(
                location=[blob[0], blob[1]],
                radius=blob[2],
                fill_opacity=0.2,
                fill_color="cornflowerblue",
            ).add_to(map)
    map.save(f"{data_folder}/v4/{date}_{user}.html")