In [1]:
#imports
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from dotenv import load_dotenv
import os
from typing import Callable

load_dotenv()
raw_data_folder = os.getenv("rawDataDirectory")
data_folder = os.getenv("dataFolder")
privateLatStr = os.getenv("privateLat")
privateLonStr = os.getenv("privateLon")
privateRadStr = os.getenv("privateRad")

assert (
    privateLatStr is not None
    and privateLonStr is not None
    and privateRadStr is not None
)

privateLat = float(privateLatStr)
privateLon = float(privateLonStr)
privateRad = float(privateRadStr)

from util.clean_file import filter_by_distance

filterFunction: Callable[[float, float], bool] = lambda lat, lon: filter_by_distance(
    (lat, lon), (privateLat, privateLon, privateRad)
)

In [2]:
locations = pd.read_csv(
    f"{raw_data_folder}/data_V3",
    delimiter=";",
    header=0,
    dtype={
        "finePermission": bool,
        "foreGround": bool,
        "priority": int,
        "user": str,
        "time": int,
        "realLocation": str,
        "obfuscatedLocation": str,
    },
)
blobs=pd.read_csv(
    f"{raw_data_folder}/blobs_V3",
    delimiter=";",
    header=0,
    dtype={
        "user":str,
        "blobs":str
    }
    
)
locations["time"] = pd.to_datetime(locations["time"], unit="ms")

In [3]:
# data manipulation
locations["realLocation"] = locations["realLocation"].apply(eval)
locations["obfuscatedLocation"] = locations["obfuscatedLocation"].apply(eval)
locations[["real_latitude", "real_longitude", "real_timestamp"]] = pd.DataFrame(
    locations["realLocation"].tolist(), index=locations.index
)
locations[["obf_latitude", "obf_longitude", "obf_timestamp"]] = pd.DataFrame(
    locations["obfuscatedLocation"].tolist(), index=locations.index
)
locations = locations.sort_values(by="time")
locations["date"] = locations["time"].dt.date
print(locations.shape)
locations = locations[
    locations.apply(
        lambda row: filterFunction(row["real_latitude"], row["real_longitude"]), axis=1
    )
]
print(locations.shape)
location_groups = locations.groupby(["date", "user"])
print(location_groups.size())
print(locations.head())

(83232, 14)
(65274, 14)
date        user            
2025-01-07  218f4413e393d7d3      100
            af428d6b009f1a2c     2881
            cfcca27c720dfceb     9115
2025-01-08  a1eba85199a732e4    14515
            af428d6b009f1a2c    10664
            cfcca27c720dfceb       29
2025-01-09  af428d6b009f1a2c    27748
2025-01-10  af428d6b009f1a2c       24
2025-01-13  af428d6b009f1a2c      198
dtype: int64
   finePermission  foreGround  priority              user  \
0            True        True       100  218f4413e393d7d3   
1            True        True       100  218f4413e393d7d3   
2            True        True       100  218f4413e393d7d3   
3            True        True       100  218f4413e393d7d3   
4            True        True       100  218f4413e393d7d3   

                     time                            realLocation  \
0 2025-01-07 15:23:19.690  (51.0591978, 3.7078166, 1736263399203)   
1 2025-01-07 15:23:23.406  (51.0592893, 3.7077559, 1736263403360)   
2 2025-01-07 15:23

In [4]:
blobs['blobs']=blobs['blobs'].apply(eval)
# blobs['count']=blobs['blobs'].apply(len)
# blobs=blobs.loc[blobs.groupby("user")["count"].idxmax()]
blobs = (
    blobs.groupby('user')['blobs']
    .apply(lambda x: set().union(*x))  # Combine all lists into a single set
    .reset_index()
)
blobs.head()

Unnamed: 0,user,blobs
0,218f4413e393d7d3,"{(51.05921343597361, 3.7078111857904585, 100.0..."
1,a1eba85199a732e4,"{(51.079834196590724, 3.9978869496727585, 100.0)}"
2,af428d6b009f1a2c,"{(51.05549677281474, 3.8455503330265817, 100.0..."
3,cfcca27c720dfceb,"{(51.080362213436345, 3.996791937583092, 100.0..."


In [5]:
# maakt voor ieder device en per dag een groep aan
for (date, user), group in location_groups:
    map = folium.Map(
        location=[group["real_latitude"].mean(), group["real_longitude"].mean()],
        zoom_start=13,
    )

    # voor masten(comment out if not needed)
    # marker_cluster=MarkerCluster().add_to(map)
    # for coord in coordinates:
    #     folium.Marker([coord[1], coord[0]]).add_to(marker_cluster)

    # 2 trails
    real_location = []
    Obfuscated_location = []
    for index, row in group.iterrows():
        real_location.append((row["real_latitude"], row["real_longitude"]))
        Obfuscated_location.append((row["obf_latitude"], row["obf_longitude"]))
    folium.PolyLine(
        locations=Obfuscated_location,
        color="#0000FF",
        tooltip="Obfuscated location",
    ).add_to(map)
    folium.PolyLine(
        locations=real_location,
        color="#FF0000",
        tooltip="Real location",
    ).add_to(map)
    blob_rows = blobs[blobs["user"] == user]
    row = blob_rows.iloc[0]["blobs"]
    row = [blob for blob in row if filterFunction(blob[0], blob[1])]
    for blob in row:
        folium.Circle(
            location=[blob[0], blob[1]],
            radius=blob[2],
            fill_opacity=0.2,
            fill_color="cornflowerblue",
        ).add_to(map)
    map.save(f"{data_folder}/v3/{date}_{user}.html")