# Derive driving style and average energy consumption from historical trips json file

In [1]:
import json
import pandas as pd

with open("historical_trips_full.json") as f:
    hist = json.load(f)


In [2]:
rows = []

for trip in hist["trips"]:
    driver = trip["driver_id"]
    trip_id = trip["trip_id"]

    for t in trip["telemetry"]:
        rows.append({
            "driver_id": driver,
            "trip_id": trip_id,
            "speed": t["speed_kmh"],
            "acc": t["acceleration_mps2"],
            "braking": 1 if t["braking"] else 0,
            "whkm": t["energy_consumption_Wh_per_km"],
            "road_type": t["road_type"]
        })

df = pd.DataFrame(rows)
df.head()


Unnamed: 0,driver_id,trip_id,speed,acc,braking,whkm,road_type
0,D001,hist_trip_001,40.395895,0.0,0,,city
1,D001,hist_trip_001,45.432533,1.105007,0,212.100146,city
2,D001,hist_trip_001,39.779163,-1.243133,1,171.353002,city
3,D001,hist_trip_001,41.834934,0.446878,0,198.937559,city
4,D001,hist_trip_001,36.592719,-1.153758,1,172.693635,city


In [3]:
features = df.groupby("driver_id").agg({
    "speed": ["mean", "std"],
    "acc": ["mean", "std"],
    "braking": "mean",
    "whkm": ["mean", "std"],
}).reset_index()

features.columns = [
    "driver_id",
    "speed_mean", "speed_std",
    "acc_mean", "acc_std",
    "braking_ratio",
    "whkm_mean", "whkm_std"
]

features


Unnamed: 0,driver_id,speed_mean,speed_std,acc_mean,acc_std,braking_ratio,whkm_mean,whkm_std
0,D001,40.569818,5.672002,0.056928,2.125311,0.325,197.211573,35.395449
1,D002,111.048343,9.254229,-0.013009,2.304715,0.369444,214.454988,35.805512
2,D003,126.169883,6.520863,0.012867,1.916642,0.356667,230.477516,34.578797
3,D004,120.296529,9.413895,-0.00959,1.925964,0.36,236.425057,31.890315
4,D005,116.826503,6.511118,0.008538,2.21915,0.376667,233.434211,35.213106
5,D006,124.598441,8.480198,0.016101,2.082048,0.383333,223.532295,41.984823
6,D007,62.881333,31.057417,-0.013333,0.221989,0.146667,176.456944,17.138687


In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(features.drop("driver_id", axis=1))


In [5]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, random_state=42)
features["cluster"] = kmeans.fit_predict(X)
features


Unnamed: 0,driver_id,speed_mean,speed_std,acc_mean,acc_std,braking_ratio,whkm_mean,whkm_std,cluster
0,D001,40.569818,5.672002,0.056928,2.125311,0.325,197.211573,35.395449,2
1,D002,111.048343,9.254229,-0.013009,2.304715,0.369444,214.454988,35.805512,0
2,D003,126.169883,6.520863,0.012867,1.916642,0.356667,230.477516,34.578797,0
3,D004,120.296529,9.413895,-0.00959,1.925964,0.36,236.425057,31.890315,0
4,D005,116.826503,6.511118,0.008538,2.21915,0.376667,233.434211,35.213106,0
5,D006,124.598441,8.480198,0.016101,2.082048,0.383333,223.532295,41.984823,0
6,D007,62.881333,31.057417,-0.013333,0.221989,0.146667,176.456944,17.138687,1


In [6]:
def map_cluster(c):
    if c == 0:
        return "aggressive"
    elif c == 1:
        return "normal"
    elif c == 2:
        return "eco"

features["driving_style"] = features["cluster"].apply(map_cluster)
features


Unnamed: 0,driver_id,speed_mean,speed_std,acc_mean,acc_std,braking_ratio,whkm_mean,whkm_std,cluster,driving_style
0,D001,40.569818,5.672002,0.056928,2.125311,0.325,197.211573,35.395449,2,eco
1,D002,111.048343,9.254229,-0.013009,2.304715,0.369444,214.454988,35.805512,0,aggressive
2,D003,126.169883,6.520863,0.012867,1.916642,0.356667,230.477516,34.578797,0,aggressive
3,D004,120.296529,9.413895,-0.00959,1.925964,0.36,236.425057,31.890315,0,aggressive
4,D005,116.826503,6.511118,0.008538,2.21915,0.376667,233.434211,35.213106,0,aggressive
5,D006,124.598441,8.480198,0.016101,2.082048,0.383333,223.532295,41.984823,0,aggressive
6,D007,62.881333,31.057417,-0.013333,0.221989,0.146667,176.456944,17.138687,1,normal


In [7]:
with open("drivers_dataset.json") as f:
    drivers = json.load(f)


In [8]:
style_map = dict(zip(features["driver_id"], features["driving_style"]))
style_map


{'D001': 'eco',
 'D002': 'aggressive',
 'D003': 'aggressive',
 'D004': 'aggressive',
 'D005': 'aggressive',
 'D006': 'aggressive',
 'D007': 'normal'}

In [9]:
for d in drivers["user_profiles"]:
    d["driving_style"] = style_map.get(d["driver_id"], "unknown")

#with open("drivers_with_style.json", "w") as f:
#    json.dump(drivers, f, indent=2)


In [10]:
# Baseline energetico per ogni driver
baseline_whkm_map = df.groupby("driver_id")["whkm"].mean().to_dict()

baseline_whkm_map


{'D001': 197.21157325413844,
 'D002': 214.45498751860978,
 'D003': 230.47751596000583,
 'D004': 236.42505710901975,
 'D005': 233.43421133738025,
 'D006': 223.53229503657792,
 'D007': 176.45694444444445}

In [11]:
# Crea un dizionario driver_id -> driving_style
#style_map = dict(zip(features["driver_id"], features["driving_style"]))

# Aggiorna i driver con driving_style + baseline_whkm
for d in drivers["user_profiles"]:
    driver_id = d["driver_id"]

    #d["driving_style"] = style_map.get(driver_id, "unknown")
    d["baseline_whkm"] = float(baseline_whkm_map.get(driver_id, -1))


In [12]:
with open("drivers_with_style.json", "w") as f:
    json.dump(drivers, f, indent=2)


#calendar

In [13]:
'''import json
import datetime as dt
from collections import defaultdict, Counter

# Carica user profiles (quello originale o drivers_with_style se l'hai già arricchito)
with open("drivers_with_style.json") as f:
    drivers = json.load(f)

# Carica il calendario fake
with open("calendar_events_fake.json") as f:
    cal = json.load(f)

events = cal["events"]

weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

def parse_dt(s):
    return dt.datetime.fromisoformat(s)
'''

In [14]:
'''pattern_counts = Counter()
pattern_example = {}

for ev in events:
    start = parse_dt(ev["start_time"])
    end = parse_dt(ev["end_time"])
    weekday = weekday_names[start.weekday()]  # Mon, Tue, ...

    key = (
        ev["driver_id"],
        weekday,
        start.hour,
        end.hour,
        ev["category"],
        ev["location_name"],
    )
    pattern_counts[key] += 1
    pattern_example[key] = ev  # salvo un esempio per le coord
'''

In [15]:
'''habits_by_driver = defaultdict(list)

for key, count in pattern_counts.items():
    driver_id, weekday, sh, eh, category, loc_name = key
    if count < 2:
        continue  # non è davvero un'abitudine

    ev = pattern_example[key]
    habits_by_driver[driver_id].append({
        "day_of_week": weekday,
        "time_range": f"{sh:02d}:00-{eh:02d}:00",
        "category": category,
        "location_name": loc_name,
        "location": ev["location"],
        "occurrences": count
    })'''


In [16]:
'''# driver_id -> lista di abitudini
for d in drivers["user_profiles"]:
    did = d["driver_id"]
    d["calendar_habits"] = {
        "recurring_events": habits_by_driver.get(did, [])
    }

# Salvi un nuovo file arricchito
with open("drivers_with_style_and_habits.json", "w") as f:
    json.dump(drivers, f, indent=2)'''


In [13]:
import json
import datetime as dt
from collections import defaultdict, Counter

# === 1. Carica i driver (già con driving_style se l'hai fatto prima) ===
with open("drivers_with_style.json") as f:
    drivers = json.load(f)

# === 2. Carica il calendario ===
# Usa il file che vuoi: es. quello minimal con marketing
with open("calendar_events_fake.json") as f:
    cal = json.load(f)

events = cal["events"]

weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

def parse_dt(s: str) -> dt.datetime:
    return dt.datetime.fromisoformat(s)

# === 3. Conta i pattern ricorrenti (driver + giorno + fascia oraria + luogo) ===
pattern_counts = Counter()
pattern_example = {}

for ev in events:
    start = parse_dt(ev["start_time"])
    end = parse_dt(ev["end_time"])
    weekday = weekday_names[start.weekday()]  # Mon, Tue, ...

    key = (
        ev["driver_id"],
        weekday,
        start.hour,
        end.hour,
        ev["category"],
        ev["location_name"],
    )
    pattern_counts[key] += 1
    pattern_example[key] = ev  # salvo un esempio per recuperare location

# === 4. Costruisci habits_by_driver solo con eventi che si ripetono (>=2 volte) ===
habits_by_driver = defaultdict(list)

for key, count in pattern_counts.items():
    driver_id, weekday, sh, eh, category, loc_name = key
    if count < 2:
        continue  # non è davvero un'abitudine ricorrente

    ev = pattern_example[key]
    habits_by_driver[driver_id].append({
        "day_of_week": weekday,
        "time_range": f"{sh:02d}:00-{eh:02d}:00",
        "category": category,
        "location_name": loc_name,
        "location": ev["location"],
        "occurrences": count
    })

# === 5. Aggiungi le abitudini dentro ai profili driver ===
for d in drivers["user_profiles"]:
    did = d["driver_id"]
    d["calendar_habits"] = {
        "recurring_events": habits_by_driver.get(did, [])
    }

# === 6. Salva il nuovo file arricchito ===
with open("drivers_with_style_and_habits.json", "w") as f:
    json.dump(drivers, f, indent=2)


In [14]:
import datetime as dt
import json

# Carica i driver arricchiti con calendar_habits
with open("drivers_with_style_and_habits.json") as f:
    drivers = json.load(f)

# Per comodità, creiamo una mappa driver_id -> profilo
driver_map = {d["driver_id"]: d for d in drivers["user_profiles"]}

def predict_habit(driver_id, current_datetime):
    """
    Restituisce l'abitudine probabile di un driver
    in base al giorno e all'ora attuale, usando calendar_habits.
    """
    profile = driver_map.get(driver_id)
    if profile is None:
        return None

    habits = profile.get("calendar_habits", {}).get("recurring_events", [])
    if not habits:
        return None

    weekday = current_datetime.strftime("%a")  # es: Mon, Tue, Wed
    current_hour = current_datetime.hour

    for ev in habits:
        # match giorno
        if ev["day_of_week"] != weekday:
            continue

        # match fascia oraria: "HH:MM-HH:MM"
        start_h, end_h = ev["time_range"].split('-')
        start_h = int(start_h.split(':')[0])
        end_h = int(end_h.split(':')[0])

        if start_h <= current_hour < end_h:
            return ev  # ritorna l'evento completo

    return None


In [15]:
now = dt.datetime(2025, 12, 1, 18, 30)  # un lunedì
predict_habit("D007", now)


{'day_of_week': 'Mon',
 'time_range': '18:00-20:00',
 'category': 'leisure',
 'location_name': 'Torino Shopping Gallery',
 'location': {'lat': 45.0709, 'lon': 7.6785},
 'occurrences': 2}