In [16]:
import os
import glob
import pickle
from IPython.display import Markdown
from config import datapath

import pandas as pd
import numpy as np
import datetime as dt

from sklearn.cluster import DBSCAN

import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns 
sns.set_context("notebook", rc={"axes.labelsize": 14, "xtick.labelsize": 14, "ytick.labelsize": 14})
sns.set_style("whitegrid", {'axes.grid': True})
%matplotlib inline

today = "22042024"

with open(datapath + f'/ema_data_{today}.pkl', 'rb') as file:
    df_active = pickle.load(file)

with open(datapath + f'/gps_data_{today}.pkl', 'rb') as file:
    df_gps = pickle.load(file)
    
with open(datapath + f'/passive_data_{today}.pkl', 'rb') as file:
    df_passive = pickle.load(file)

with open(datapath + f'/monitoring_data_{today}.pkl', 'rb') as file:
    df_monitoring = pickle.load(file)

In [17]:
df_gps_merged = df_gps.merge(df_monitoring, on = "customer", how="inner")

In [18]:
df_gps_merged = df_gps_merged.loc[df_gps_merged.status == "Abgeschlossen"]

In [25]:
df_int = df_gps.pivot_table(
    index=["customer", "startTimestamp"],
    columns="type",
    values=["doubleValue", "startTimestamp_hour", "startTimestamp_day"],
    aggfunc='first'  # Using 'first' since each type should theoretically have only one entry per customer and timestamp
)

# Flatten the MultiIndex in columns
df_int.columns = ['_'.join(col).strip() for col in df_int.columns.values]

# Drop redundant day and hour columns for longitude (assuming latitude day and hour are kept)
df_int = df_int.drop(columns=[
    'startTimestamp_day_Longitude',
    'startTimestamp_hour_Longitude'
])

# Rename the columns for clarity
df_int = df_int.rename(columns={
    'doubleValue_Latitude': 'Latitude',
    'doubleValue_Longitude': 'Longitude',
    'startTimestamp_day_Latitude': 'Day',  # Keeping one 'Day' column
    'startTimestamp_hour_Latitude': 'Hour'  # Keeping one 'Hour' column
})

df_int['weekday'] = df_int['Day'].dt.day_name()
df_int["n_hours"] = df_int.groupby(["customer", "Day"])["Hour"].transform("nunique")

In [24]:
df_int

Unnamed: 0_level_0,Unnamed: 1_level_0,Latitude,Longitude,Day,Hour
customer,startTimestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
05kz,2023-10-18 17:10:15,58.255888,-29.427252,2023-10-18,17.0
05kz,2023-10-18 17:24:27,58.255888,-29.427252,2023-10-18,17.0
05kz,2023-10-19 08:21:56,58.256028,-29.427232,2023-10-19,8.0
05kz,2023-10-19 17:04:23,58.256518,-29.429422,2023-10-19,17.0
05kz,2023-10-19 17:04:25,58.256548,-29.429742,2023-10-19,17.0
...,...,...,...,...,...
zgxc,2024-04-18 14:53:41,-51.528738,-91.260991,2024-04-18,14.0
zgxc,2024-04-18 14:54:01,-51.528838,-91.261151,2024-04-18,14.0
zgxc,2024-04-18 14:54:11,-51.528798,-91.261081,2024-04-18,14.0
zgxc,2024-04-18 14:54:16,-51.528768,-91.261031,2024-04-18,14.0
