In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

In [None]:
# Adaptar a las rutas de los archivos
eventos = pd.read_json("../../simulation.jsonlines", lines=True)
planes = pd.read_json("../../plans.jsonlines", lines=True)

In [45]:
eventos.eventType.unique()

array(['Truck initialized', 'Truck received packets', 'Truck departed',
       'Truck arrived', 'Truck started delivering',
       'Truck ended delivering', 'Truck departed to depot',
       'Truck ended route'], dtype=object)

In [44]:
eventos.columns

Index(['eventDescription', 'eventTime', 'eventType', 'simulationId', 'truckId',
       'prev_event', 'prev_time', 'delta'],
      dtype='object')

In [None]:
camiones = []
for simId in planes.simulationId.unique():
    for truck in planes[planes.simulationId == simId].trucks.values[0]:
        camiones.append(pd.DataFrame(truck["route"]).assign(simulationId=simId, truckId=truck["truck_id"]))
camiones = pd.concat(camiones)

In [None]:
tiempos_plan = camiones.sort_values(["simulationId","truckId"]).assign(duration=lambda x: x["duration"]*1000).groupby(["simulationId","truckId"]).duration.agg(list).reset_index()
tiempos_plan.rename(columns={"duration":"tiempo_plan"}, inplace=True)
tiempos_plan

In [None]:
eventos = eventos.sort_values(["simulationId", "truckId", "eventTime"])
eventos["prev_event"] = eventos.groupby(["truckId", "simulationId"])["eventType"].shift(1)
eventos["prev_time"] = eventos.groupby(["truckId", "simulationId"])["eventTime"].shift(1)
eventos["delta"] = eventos.eventTime - eventos.prev_time
tiempos_sim = eventos[eventos.eventType.isin(["Truck arrived", "Truck ended route"])].sort_values(["simulationId","truckId", "eventTime"]).groupby(["simulationId","truckId"]).delta.agg(list).reset_index()
tiempos_sim.rename(columns={"delta":"tiempo_sim"}, inplace=True)
tiempos_sim

In [46]:
retrasos = tiempos_sim.merge(tiempos_plan, on=["simulationId","truckId"]).dropna().reset_index(drop=True)
retrasos

Unnamed: 0,simulationId,truckId,tiempo_sim,tiempo_plan
0,00ab3765-2a12-425d-a12a-07577a572829,0013DYS,"[502180.0, 730650.0, 259140.0, 548940.0, 27281...","[502000, 730000, 259000, 548000, 272000, 1660000]"
1,00ab3765-2a12-425d-a12a-07577a572829,3321FBL,"[154980.0, 405820.0, 379110.0, 758550.0, 85260...","[154000, 405000, 379000, 758000, 85000, 422000..."
2,00ab3765-2a12-425d-a12a-07577a572829,3953RLD,"[486100.0, 1678300.0, 722340.0, 1684850.0]","[486000, 1678000, 722000, 1684000]"
3,00ab3765-2a12-425d-a12a-07577a572829,5534TPH,"[534930.0, 366310.0, 147260.0, 552830.0, 51980...","[534000, 366000, 147000, 552000, 51000, 777000..."
4,00ab3765-2a12-425d-a12a-07577a572829,6270NFM,"[1308010.0, 1377610.0, 1901050.0]","[1307000, 1377000, 1900000]"
...,...,...,...,...
619,fd4bc7cb-f234-4653-a87d-2514d6df6eca,3953RLD,"[154990.0, 4097980.0, 3830120.0, 511620.0]","[154000, 4097000, 3829000, 511000]"
620,fd4bc7cb-f234-4653-a87d-2514d6df6eca,5030LXK,"[715090.0, 322930.0, 223260.0, 877610.0, 89063...","[714000, 322000, 223000, 877000, 890000, 10100..."
621,fd4bc7cb-f234-4653-a87d-2514d6df6eca,5534TPH,"[1327170.0, 77850.0, 418140.0, 215750.0, 23805...","[1327000, 77000, 418000, 215000, 237000, 58100..."
622,fd4bc7cb-f234-4653-a87d-2514d6df6eca,6270NFM,"[772130.0, 169930.0, 124920.0, 191040.0, 19905...","[772000, 169000, 124000, 190000, 198000, 28800..."


In [None]:
arr = np.array(retrasos.apply(lambda x: list(zip(x.tiempo_plan, x.tiempo_sim)), axis=1).explode())
arr = np.array(arr.tolist())
x = arr[:,0].reshape(-1,1) / 1000
y = arr[:,1].reshape(-1,1) / 1000


In [None]:
# Entrenar modelo
travelModel = LinearRegression()
travelModel.fit(x, y)

# Guardar modelo
with open('travelModel.pkl', 'wb') as f:
    pickle.dump(travelModel, f)


In [None]:
tiemposEntrega = eventos[eventos.eventType=="Truck ended delivering"][["truckId", "delta"]]

# Label encoding
le = LabelEncoder()
tiemposEntrega["truckId"] = le.fit_transform(tiemposEntrega["truckId"])

# Entrenar modelo
deliveryModel = LinearRegression()
deliveryModel.fit(tiemposEntrega["truckId"].values.reshape(-1,1), tiemposEntrega["delta"].values.reshape(-1,1))

# Guardar modelo y label encoder
with open('deliveryModel.pkl', 'wb') as f:
    pickle.dump(deliveryModel, f)
    
with open('le.pkl', 'wb') as f:
    pickle.dump(le, f)
