In [1]:
# imports
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from dotenv import load_dotenv
import os
from typing import Callable

load_dotenv()
raw_data_folder = os.getenv("rawDataDirectory")
data_folder = os.getenv("dataFolder")
privateLatStr = os.getenv("privateLat")
privateLonStr = os.getenv("privateLon")
privateRadStr = os.getenv("privateRad")
assert (
    privateLatStr is not None
    and privateLonStr is not None
    and privateRadStr is not None
)

privateLat = float(privateLatStr)
privateLon = float(privateLonStr)
privateRad = float(privateRadStr)

from util.clean_file import filter_by_distance

filterFunction: Callable[[float, float], bool] = lambda lat, lon: filter_by_distance(
    (lat, lon), (privateLat, privateLon, privateRad)
)
# import dataVerwerkingFunction

In [2]:
init = pd.read_csv(  # type: ignore
    f"{raw_data_folder}/data_V1",
    delimiter=";",
    header=0,
    dtype={
        "finePermission": bool,
        "foreground": bool,
        "user": str,
        "latitude": float,
        "longitude": float,
        "time": int,
        "accuracy": float,
    },
)
init["time"] = pd.to_datetime(init["time"], unit="ms")  # type: ignore
print(init.shape)
data = init[
    init.apply(
        lambda row: filterFunction(row["latitude"], row["longitude"]), # type: ignore
        axis=1,
    )
]
print(data.shape)


# coordinaten van de masten(comment out if not needed)
# coordinates=dataVerwerkingFunction.get_masten('../data/zendmasten_vlaanderen.json')
# coordinates=coordinates['geometry.coordinates']

(7090, 7)
(3279, 7)


In [3]:
# data manipulation
data['date'] = data['time'].dt.date
groups=data.groupby(['user','date']) # type: ignore
print(groups.size())

user              date      
cfcca27c720dfceb  2024-10-04     293
                  2024-10-05    1031
                  2024-10-06     663
                  2024-10-07    1292
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['date'] = data['time'].dt.date


In [4]:
# maakt voor ieder device en per dag een groep aan
for (user,date), group in groups:
    map = folium.Map(location=[group['latitude'].mean(),group['longitude'].mean()],zoom_start=9)
    
    # voor masten(comment out if not needed)
    # marker_cluster=MarkerCluster().add_to(map)
    # for coord in coordinates:
    #     folium.Marker([coord[1], coord[0]]).add_to(marker_cluster)
    

    # 2 trails
    fineTrail=[]
    coarseTrail=[]       
    for index,row in group.iterrows():
        if(row['finePermission']):
            fineTrail.append((row['latitude'],row['longitude']))
        else:
            coarseTrail.append((row['latitude'],row['longitude']))
    folium.PolyLine(locations=fineTrail, color="#FF0000", tooltip="fine grained location").add_to(map)
    folium.PolyLine(locations=coarseTrail, color="#0000FF", tooltip="coarse grained location").add_to(map)
    map.save(f"{data_folder}/v1/{date}.html")