In [None]:
import geopandas as gpd
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

from shapely import geometry, ops
from shapely.geometry import MultiLineString, LineString, Point
import os
from shapely import wkb
import binascii
import pandas as pd
import geopandas as gpd

import time

#### Load data

In [None]:
%%time

# Define the CRS you want to use (e.g., EPSG:4326 for WGS84)
target_crs = 'EPSG:4326'

# Storyline motiontag
raw = pd.read_csv("../Data/dumps_motiontag/Storyline.EPFL-Panel.2023-04-24--2023-06-05.csv", sep=';')

#Charger la table de correspondance entre les userid Motiontag et FORS
match_userid = pd.read_csv('../Data/dumps_fors/correspondance_user_id.csv')
print("Fichier table de correspondance user id chargé")

#### Convertir une chaîne EWKB en objet shapely

In [None]:
%%time
# Fonction pour convertir une chaîne EWKB en objet shapely
def parse_ewkb(hex_str):
    # Convertit la chaîne hexadécimale en binaire
    binary_data = binascii.unhexlify(hex_str)
    # Utilise wkb.loads pour obtenir l'objet shapely à partir des données binaires
    geometry = wkb.loads(binary_data)
    return geometry

# Appliquez la fonction à la colonne 'geometry' pour obtenir des objets shapely
raw['geometry'] = raw['geometry'].apply(parse_ewkb)

# Créez un GeoDataFrame à partir du DataFrame
gdf = gpd.GeoDataFrame(raw, geometry='geometry', crs=target_crs)

#### Formater certaines variables

In [None]:
%%time
gdf['started_at'] = pd.to_datetime(gdf['started_at'])
gdf['finished_at'] = pd.to_datetime(gdf['finished_at'])
gdf.rename(columns={'user_id':'user_id_motiontag'}, inplace = True)


In [None]:
# Match user ids Motiontag and FORS
gdf = pd.merge(gdf, match_userid, on="user_id_motiontag", how="left")


In [None]:
staypoints = gdf[gdf['type'] == 'Stay'].rename(columns={'id':'activity_id'}).reset_index(drop=True)
legs = gdf[gdf['type'] == 'Track'].rename(columns={'id':'leg_id'}).reset_index(drop=True)

In [None]:
staypoints['lon'] = staypoints.geometry.x
staypoints['lat'] = staypoints.geometry.y

#### Multilinestrings to linestrings

In [None]:
# Multilinestrings to linestrings
legs.geometry.geom_type.value_counts()

In [None]:
%%time 
# Rewrite continuous MultiLineString into LineString geometries
legs['geometry'] = legs['geometry'].apply(lambda geom: ops.linemerge(geom) if isinstance(geom, MultiLineString) else geom)
# Remove the discontinuous Multilinestrings (only the discontinuous lines remain after the previous operation)
# note: an alternative would be to explode the discontinuous multiline, but then we don't have the departure / arrival time: legs.explode(index_parts=True)
legs = legs.loc[legs.geometry.geom_type != 'MultiLineString',:]
# Point counts for each LineString
legs['point_per_linestring'] = legs['geometry'].apply(lambda geom: len(geom.coords))
#legs['dep_coordinates'] = legs['geometry'].apply(lambda geom: geom.coords[0])
legs.geometry.geom_type.value_counts()

#### Remove nan columns

In [None]:
staypoints = staypoints.drop(columns=["length","detected_mode","mode"])
legs = legs.drop(columns=["purpose"])

#### Save to pickles

In [None]:
%%time
staypoints.to_pickle("../Data/dumps_motiontag/storyline_formated/staypoints.pkl")
legs.to_pickle("../Data/dumps_motiontag/storyline_formated/legs.pkl")