### Step-by-step transformation pipeline for validation + vehicle events + calendar join

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Optional
import sys, os
sys.path.append(os.path.abspath(".."))

In [3]:
validations_path: Optional[str] = "data/apex-validations/apex-validations.csv"
events_path: Optional[str] = "data/vehicle-events/vehicle-events.csv" 
calendar_path: Optional[str] = "data/calendario.xlsx"

### Helper functions

In [4]:
VALID_STATUSES = {0, 4, 5, 6}

def read_any(path: str) -> pd.DataFrame:
    p = Path(path)
    if p.suffix.lower() == ".xlsx":
        return pd.read_excel(p)
    elif p.suffix.lower() in {".csv", ".txt"}:
        return pd.read_csv(p)
    elif p.suffix.lower() in {".json"}:
        return pd.read_json(p, lines=True)
    else:
        raise ValueError(f"Unsupported file type: {p.suffix}")

def to_datetime_from_ms(series: pd.Series) -> pd.Series:
    # Handles ints/floats/strings like "1.75129E+12"
    # Ensure numeric milliseconds, then convert to UTC datetime
    ser = pd.to_numeric(series, errors="coerce")
    return pd.to_datetime(ser, unit="ms", utc=True)

# Convert booleans and ints where appropriate
def to_int(series):
    return pd.to_numeric(series, errors="coerce").astype("Int64")

def to_float(series):
    return pd.to_numeric(series, errors="coerce")


### 1. Load Data

In [19]:
if validations_path:
    df_valid = read_any(validations_path)

if events_path:
    df_event = read_any(events_path)

if calendar_path:
    df_cal = read_any(calendar_path)

  return pd.read_csv(p)
  return pd.read_csv(p)


### 2. Clean + Timestamp conversion

In [20]:
for col in ("created_at","received_at"):
    if col in df_valid.columns:
        df_valid[col] = to_datetime_from_ms(df_valid[col])
        df_event[col] = to_datetime_from_ms(df_event[col])

### 3. Normalize specific fields

In [21]:
# Filter: passenger validations & valid statuses
if "is_passenger" in df_valid.columns:
    df_valid = df_valid[df_valid["is_passenger"].astype(bool)]
if "validation_status" in df_valid.columns:
    df_valid = df_valid[df_valid["validation_status"].isin(VALID_STATUSES)]

In [23]:
df_valid["vehicle_id"] = to_int(df_valid["vehicle_id"])
df_valid["stop_id"] = to_int(df_valid["stop_id"])

In [22]:
df_event["vehicle_id"] = to_int(df_event["vehicle_id"])
df_event["stop_id"] = to_int(df_event["stop_id"])
df_event["latitude"] = to_float(df_event["latitude"])
df_event["longitude"] = to_float(df_event["longitude"])

### 4. Calendar join

In [24]:
df_valid["service_date"] = df_valid["created_at"].dt.tz_convert("UTC").dt.date
df_valid["date_int"] = (df_valid["service_date"].astype(str).str.replace("-","", regex=False)).astype(int)

In [27]:
df_event["service_date"] = df_event["created_at"].dt.tz_convert("UTC").dt.date
df_event["date_int"] = (df_event["service_date"].astype(str).str.replace("-","", regex=False)).astype(int)

In [25]:
df_cal = df_cal.rename(columns={"date":"date_int"})
# Ensure date_int is integer
df_cal["date_int"] = pd.to_numeric(df_cal["date_int"], errors="coerce").astype("Int64")

In [26]:
df_valid = df_valid.merge(df_cal, on="date_int", how="left")

### 5. Aggregations

#### A) Passengers per stop_id per day (with day_type/dia_tipo)

In [28]:
per_stop_day = (
    df_valid
    .dropna(subset=["stop_id"])
    .groupby(["stop_id","service_date","day_type","dia_tipo"], dropna=False)
    .size()
    .reset_index(name="num_passengers")
    .sort_values(["service_date","stop_id"])
)

In [29]:
per_stop_day

Unnamed: 0,stop_id,service_date,day_type,dia_tipo,num_passengers
7,10009,2025-10-13,1,DU,561
15,10010,2025-10-13,1,DU,398
22,10011,2025-10-13,1,DU,1
23,10012,2025-10-13,1,DU,5
29,10018,2025-10-13,1,DU,1
...,...,...,...,...,...
61439,180709,2025-10-20,1,DU,4
61508,180723,2025-10-20,1,DU,4
61523,180727,2025-10-20,1,DU,1
61531,180730,2025-10-20,1,DU,6


#### B) Total passengers per stop_id (all days)

In [30]:
per_stop_total = (
    per_stop_day
    .groupby("stop_id", as_index=False)["num_passengers"]
    .sum()
    .rename(columns={"num_passengers":"num_passengers_total"})
    .sort_values("num_passengers_total", ascending=False)
)

In [31]:
per_stop_total

Unnamed: 0,stop_id,num_passengers_total
934,30119,23143
1963,60002,16761
1962,60001,16751
1995,60071,15771
5193,111025,15173
...,...,...
3203,71493,1
6207,130247,1
3216,71515,1
6204,130244,1


### 6. Aggregating datasets

In [36]:
df_valid.loc[df_valid["trip_id"]=="1709_0_1_0500_0529_0_1"]

Unnamed: 0,_id,agency_id,card_serial_number,category,created_at,device_id,event_type,is_passenger,line_id,mac_ase_counter_value,...,received_at,stop_id,trip_id,units_qty,validation_status,vehicle_id,service_date,date_int,day_type,dia_tipo
0,68EC7944-0002-40AE-B4C0-C9E8F5021770,41,API3OR,subscription,2025-10-13 04:00:03+00:00,sibelius_35400000424,1,True,1709,137072.0,...,2025-10-13 04:17:42.318000+00:00,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
6,68EC794A-0004-40AE-8580-C9E8F5021771,41,N4255Y,subscription,2025-10-13 04:00:10+00:00,sibelius_35400000424,1,True,1709,137073.0,...,2025-10-13 04:17:42.323000+00:00,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
8,68EC794C-0006-40AE-BF70-C9E8F5021772,41,56C05T,subscription,2025-10-13 04:00:12+00:00,sibelius_35400000424,1,True,1709,137074.0,...,2025-10-13 04:01:38.122000+00:00,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
104,68EC79CD-0006-40AE-BB10-C9E90A0170AF,41,168LGA,subscription,2025-10-13 04:02:20+00:00,berlioz_38002762,1,True,1709,94383.0,...,2025-10-13 04:11:31.251000+00:00,172467,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
105,68EC79CD-000C-40AE-A790-C9E8F5021775,41,L2QQ12,subscription,2025-10-13 04:02:21+00:00,sibelius_35400000424,1,True,1709,137077.0,...,2025-10-13 04:02:39.287000+00:00,172467,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3095685,68F1C212-0088-40AE-A140-C9E90A0180EF,41,EAO6W6,subscription,2025-10-17 04:12:01+00:00,berlioz_38002762,1,True,1709,98543.0,...,2025-10-17 04:13:23.838000+00:00,30805,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU
3095721,68F1C238-008C-40AE-B0A0-C9E90A0180F1,41,4J1IVP,subscription,2025-10-17 04:12:39+00:00,berlioz_38002762,1,True,1709,98545.0,...,2025-10-17 04:13:23.851000+00:00,30807,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU
3095736,68F1C244-008E-40AE-9940-C9E90A0180F2,41,5ZPBZU,subscription,2025-10-17 04:12:51+00:00,berlioz_38002762,1,True,1709,98546.0,...,2025-10-17 04:13:23.865000+00:00,30807,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU
3095779,68F1C260-0092-40AE-B930-C9E90A0180F4,41,HY3VOJ,subscription,2025-10-17 04:13:19+00:00,berlioz_38002762,1,True,1709,98548.0,...,2025-10-17 04:14:54.025000+00:00,30809,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU


In [32]:
df_event.loc[df_event["trip_id"]=="1709_0_1_0500_0529_0_1"]

Unnamed: 0,_id,agency_id,created_at,driver_id,extra_trip_id,latitude,longitude,odometer,pattern_id,received_at,stop_id,trigger_door,trip_id,vehicle_id,service_date,date_int
0,68ec79427bdfa272f28ac018,41,2025-10-13 04:00:00+00:00,T8YWZQ,,38.813087,-9.226364,253700576,1709_0_1,2025-10-13 04:00:01.780000+00:00,110027,OPENED,1709_0_1_0500_0529_0_1,1203,2025-10-13,20251013
147,68ec7958cb5a103887fa69b7,41,2025-10-13 04:00:23+00:00,T8YWZQ,,38.813023,-9.226362,253700576,1709_0_1,2025-10-13 04:00:24.514000+00:00,110027,CLOSED,1709_0_1_0500_0529_0_1,1203,2025-10-13,20251013
233,68ec796acb5a103887fa69bb,41,2025-10-13 04:00:41+00:00,T8YWZQ,,38.812759,-9.226890,253700656,1709_0_1,2025-10-13 04:00:42.321000+00:00,110024,NO_CHANGE,1709_0_1_0500_0529_0_1,1203,2025-10-13,20251013
234,68ec796a7bdfa272f28ac023,41,2025-10-13 04:00:41+00:00,T8YWZQ,,38.812759,-9.226890,253700656,1709_0_1,2025-10-13 04:00:42.432000+00:00,110024,NO_CHANGE,1709_0_1_0500_0529_0_1,1203,2025-10-13,20251013
256,68ec7970df3e447bc90d8023,41,2025-10-13 04:00:47+00:00,T8YWZQ,,38.812466,-9.227009,253700688,1709_0_1,2025-10-13 04:00:48.720000+00:00,110024,NO_CHANGE,1709_0_1_0500_0529_0_1,1203,2025-10-13,20251013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20810690,68f1c4aa1d39da2af2f12e70,41,2025-10-17 04:23:05+00:00,T8YWZQ,,38.753414,-9.190312,254718800,1709_0_1,2025-10-17 04:23:06.705000+00:00,60068,NO_CHANGE,1709_0_1_0500_0529_0_1,1203,2025-10-17,20251017
20810755,68f1c4af8a635a1d7c692045,41,2025-10-17 04:23:10+00:00,T8YWZQ,,38.753319,-9.190146,254719216,1709_0_1,2025-10-17 04:23:11.712000+00:00,60068,OPENED,1709_0_1_0500_0529_0_1,1203,2025-10-17,20251017
20810768,68f1c4b250d52e23e10343e8,41,2025-10-17 04:23:12+00:00,T8YWZQ,,38.753258,-9.190082,254719216,1709_0_1,2025-10-17 04:23:14.369000+00:00,,NO_CHANGE,1709_0_1_0500_0529_0_1,1203,2025-10-17,20251017
27791375,68f5b3758a635a1d7c73d083,41,2025-10-20 03:58:44+00:00,T8YWZQ,,38.813213,-9.226504,203132976,1709_0_1,2025-10-20 03:58:45.505000+00:00,110027,NO_CHANGE,1709_0_1_0500_0529_0_1,1321,2025-10-20,20251020


In [56]:
agg = (
    df_event.groupby(["stop_id", "trip_id", "pattern_id", "vehicle_id"], as_index=False)
      .agg(
          open_flag=("trigger_door", lambda x: int((x == "OPENED").any())),
          nochange_flag=("trigger_door", lambda x: int((x == "NO_CHANGE").any())),
          latitude=("latitude", "first"),
          longitude=("longitude", "first"),
          created_at=("created_at", "first"),
          received_at=("received_at", "last"),
          date_int=("date_int", "first"),
          driver_id=("vehicle_id", "first")
      )
)

# optional: sort or re-order
agg = agg.sort_values(["vehicle_id", "trip_id", "stop_id"]).reset_index(drop=True)


In [54]:
df_valid[['trip_id']].nunique()

trip_id    55264
dtype: int64

In [58]:
agg[['trip_id']].nunique()

trip_id    56048
dtype: int64

In [59]:
agg

Unnamed: 0,stop_id,trip_id,pattern_id,vehicle_id,open_flag,nochange_flag,latitude,longitude,created_at,received_at,date_int,driver_id
0,20028,3005_0_3_0700_0729_0_ESC_DU,3005_0_3,14,0,1,38.681068,-9.157301,2025-10-13 06:09:09+00:00,2025-10-16 06:10:35.924000+00:00,20251013,14
1,20029,3005_0_3_0700_0729_0_ESC_DU,3005_0_3,14,0,1,38.682278,-9.155580,2025-10-13 06:23:03+00:00,2025-10-17 06:29:36.023000+00:00,20251013,14
2,20030,3005_0_3_0700_0729_0_ESC_DU,3005_0_3,14,0,1,38.683121,-9.153488,2025-10-13 06:22:16+00:00,2025-10-17 06:28:36.956000+00:00,20251013,14
3,20031,3005_0_3_0700_0729_0_ESC_DU,3005_0_3,14,0,1,38.682236,-9.153973,2025-10-13 06:23:59+00:00,2025-10-17 06:30:51.332000+00:00,20251013,14
4,20033,3005_0_3_0700_0729_0_ESC_DU,3005_0_3,14,0,1,38.682663,-9.151899,2025-10-13 06:24:51+00:00,2025-10-17 06:31:21.539000+00:00,20251013,14
...,...,...,...,...,...,...,...,...,...,...,...,...
2832499,160805,4730_0_2|1300|2230,4730_0_2,13991,0,1,38.537315,-9.024763,2025-10-13 21:55:44+00:00,2025-10-13 21:56:20.444000+00:00,20251013,13991
2832500,160807,4730_0_2|1300|2230,4730_0_2,13991,1,1,38.540997,-9.026883,2025-10-13 21:56:30+00:00,2025-10-13 21:57:02.921000+00:00,20251013,13991
2832501,160897,4730_0_2|1300|2230,4730_0_2,13991,0,1,38.524052,-8.997693,2025-10-13 21:48:05+00:00,2025-10-13 21:49:06.463000+00:00,20251013,13991
2832502,160938,4730_0_2|1300|2230,4730_0_2,13991,1,1,38.544304,-9.028796,2025-10-13 21:57:01+00:00,2025-10-13 21:58:40.392000+00:00,20251013,13991


In [115]:
df_valid.sort_values(["created_at", "stop_id","vehicle_id", "trip_id"]).reset_index(drop=True)

Unnamed: 0,_id,agency_id,card_serial_number,category,created_at,device_id,event_type,is_passenger,line_id,mac_ase_counter_value,...,units_qty,validation_status,vehicle_id,service_date,date_int,day_type,dia_tipo,created_minute,time_diff,cluster_id
0,68EC7944-0022-40AE-8110-CA1E3F000000,42,8KDIEE,subscription,2025-10-13 04:00:03+00:00,berlioz_38001070,1,True,2769,0.0,...,,0,2512,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00,0.0,0
1,68EC7944-0022-40AE-8110-CA1E3F000000,42,3JBWK8,subscription,2025-10-13 04:00:03+00:00,berlioz_38001070,1,True,2769,0.0,...,,0,2512,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00,0.0,0
2,68EC7944-0022-40AE-8110-CA1E3F000000,42,Q2JJOI,subscription,2025-10-13 04:00:03+00:00,berlioz_38001070,1,True,2769,0.0,...,,0,2512,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00,0.0,0
3,68EC7944-0022-40AE-8110-CA1E3F000000,42,V114CX,subscription,2025-10-13 04:00:03+00:00,berlioz_38001070,1,True,2769,0.0,...,,0,2512,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00,0.0,0
4,68EC7944-0002-40AE-B4C0-C9E8F5021770,41,API3OR,subscription,2025-10-13 04:00:03+00:00,sibelius_35400000424,1,True,1709,137072.0,...,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4417058,68F5B3B3-002A-40AE-87C0-CA2FBE01B60D,42,XTNW0T,subscription,2025-10-20 03:59:47+00:00,sibelius_35400000126,1,True,2769,112141.0,...,,0,2512,2025-10-20,20251020,1,DU,2025-10-20 03:59:00+00:00,2.0,0
4417059,68F5B3B6-005C-40AE-8D70-C8B791000000,41,Z3VOA8,subscription,2025-10-20 03:59:50+00:00,berlioz_38000913,1,True,1709,0.0,...,,0,1115,2025-10-20,20251020,1,DU,2025-10-20 03:59:00+00:00,0.0,0
4417060,68F5B3B6-006A-40AE-ADB0-C9E90401EB81,41,E2HWA7,subscription,2025-10-20 03:59:50+00:00,sibelius_35400000528,1,True,1218,125825.0,...,,0,1200,2025-10-20,20251020,1,DU,2025-10-20 03:59:00+00:00,5.0,0
4417061,68F5B3BC-006C-40AE-9280-C9E90401EB82,41,QFPGYV,subscription,2025-10-20 03:59:56+00:00,sibelius_35400000528,1,True,1218,125826.0,...,,0,1200,2025-10-20,20251020,1,DU,2025-10-20 03:59:00+00:00,6.0,0


In [142]:
agg_validations = (
    df_valid.groupby(["stop_id", "trip_id", "vehicle_id", "dia_tipo", "line_id", "pattern_id", "service_date"], as_index=False)
      .agg(
          num_validations=("card_serial_number", "count"),        # total validations
          num_unique_cards=("card_serial_number", "nunique"),     # unique passengers/cards
          created_at=("created_at", "first"), 
          day_type=("day_type", "first"),
          day_tipo=("dia_tipo", "first"),
          service_date=("service_date", "first"),
          date_int=("date_int", "first")
      )
      .sort_values(["vehicle_id", "stop_id"])
)



In [66]:
df_valid.loc[df_valid["trip_id"]=="1709_0_1_0500_0529_0_1"]

Unnamed: 0,_id,agency_id,card_serial_number,category,created_at,device_id,event_type,is_passenger,line_id,mac_ase_counter_value,...,received_at,stop_id,trip_id,units_qty,validation_status,vehicle_id,service_date,date_int,day_type,dia_tipo
0,68EC7944-0002-40AE-B4C0-C9E8F5021770,41,API3OR,subscription,2025-10-13 04:00:03+00:00,sibelius_35400000424,1,True,1709,137072.0,...,2025-10-13 04:17:42.318000+00:00,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
6,68EC794A-0004-40AE-8580-C9E8F5021771,41,N4255Y,subscription,2025-10-13 04:00:10+00:00,sibelius_35400000424,1,True,1709,137073.0,...,2025-10-13 04:17:42.323000+00:00,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
8,68EC794C-0006-40AE-BF70-C9E8F5021772,41,56C05T,subscription,2025-10-13 04:00:12+00:00,sibelius_35400000424,1,True,1709,137074.0,...,2025-10-13 04:01:38.122000+00:00,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
104,68EC79CD-0006-40AE-BB10-C9E90A0170AF,41,168LGA,subscription,2025-10-13 04:02:20+00:00,berlioz_38002762,1,True,1709,94383.0,...,2025-10-13 04:11:31.251000+00:00,172467,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
105,68EC79CD-000C-40AE-A790-C9E8F5021775,41,L2QQ12,subscription,2025-10-13 04:02:21+00:00,sibelius_35400000424,1,True,1709,137077.0,...,2025-10-13 04:02:39.287000+00:00,172467,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3095685,68F1C212-0088-40AE-A140-C9E90A0180EF,41,EAO6W6,subscription,2025-10-17 04:12:01+00:00,berlioz_38002762,1,True,1709,98543.0,...,2025-10-17 04:13:23.838000+00:00,30805,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU
3095721,68F1C238-008C-40AE-B0A0-C9E90A0180F1,41,4J1IVP,subscription,2025-10-17 04:12:39+00:00,berlioz_38002762,1,True,1709,98545.0,...,2025-10-17 04:13:23.851000+00:00,30807,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU
3095736,68F1C244-008E-40AE-9940-C9E90A0180F2,41,5ZPBZU,subscription,2025-10-17 04:12:51+00:00,berlioz_38002762,1,True,1709,98546.0,...,2025-10-17 04:13:23.865000+00:00,30807,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU
3095779,68F1C260-0092-40AE-B930-C9E90A0180F4,41,HY3VOJ,subscription,2025-10-17 04:13:19+00:00,berlioz_38002762,1,True,1709,98548.0,...,2025-10-17 04:14:54.025000+00:00,30809,1709_0_1_0500_0529_0_1,,0,1203,2025-10-17,20251017,1,DU


In [103]:
df_valid.loc[(df_valid["trip_id"]=="1709_0_1_0500_0529_0_1") & (df_valid['stop_id'] == 110027)]

Unnamed: 0,_id,agency_id,card_serial_number,category,created_at,device_id,event_type,is_passenger,line_id,mac_ase_counter_value,...,stop_id,trip_id,units_qty,validation_status,vehicle_id,service_date,date_int,day_type,dia_tipo,created_minute
0,68EC7944-0002-40AE-B4C0-C9E8F5021770,41,API3OR,subscription,2025-10-13 04:00:03+00:00,sibelius_35400000424,1,True,1709,137072.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
6,68EC794A-0004-40AE-8580-C9E8F5021771,41,N4255Y,subscription,2025-10-13 04:00:10+00:00,sibelius_35400000424,1,True,1709,137073.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
8,68EC794C-0006-40AE-BF70-C9E8F5021772,41,56C05T,subscription,2025-10-13 04:00:12+00:00,sibelius_35400000424,1,True,1709,137074.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
49355,68EC7944-0002-40AE-B4C0-C9E8F5021770,41,MM1RJR,subscription,2025-10-13 04:00:03+00:00,sibelius_35400000424,1,True,1709,137072.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
49361,68EC794A-0004-40AE-8580-C9E8F5021771,41,GQVVSL,subscription,2025-10-13 04:00:10+00:00,sibelius_35400000424,1,True,1709,137073.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
49363,68EC794C-0006-40AE-BF70-C9E8F5021772,41,LMV55I,subscription,2025-10-13 04:00:12+00:00,sibelius_35400000424,1,True,1709,137074.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
59216,68EC7944-0002-40AE-B4C0-C9E8F5021770,41,NLP9F6,subscription,2025-10-13 04:00:03+00:00,sibelius_35400000424,1,True,1709,137072.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
59222,68EC794A-0004-40AE-8580-C9E8F5021771,41,A32XFA,subscription,2025-10-13 04:00:10+00:00,sibelius_35400000424,1,True,1709,137073.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
59224,68EC794C-0006-40AE-BF70-C9E8F5021772,41,XHX2P6,subscription,2025-10-13 04:00:12+00:00,sibelius_35400000424,1,True,1709,137074.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00
78942,68EC7944-0002-40AE-B4C0-C9E8F5021770,41,9TL4CL,subscription,2025-10-13 04:00:03+00:00,sibelius_35400000424,1,True,1709,137072.0,...,110027,1709_0_1_0500_0529_0_1,,0,1203,2025-10-13,20251013,1,DU,2025-10-13 04:00:00+00:00


In [143]:
agg_validations.loc[(agg_validations["trip_id"]=="1709_0_1_0500_0529_0_1") & (agg_validations['stop_id'] == 110027)]

Unnamed: 0,stop_id,trip_id,vehicle_id,dia_tipo,line_id,pattern_id,num_validations,num_unique_cards,created_at,day_type,day_tipo,service_date,date_int
573739,110027,1709_0_1_0500_0529_0_1,1203,DU,1709,1709_0_1,12,12,2025-10-13 04:00:03+00:00,1,DU,2025-10-13,20251013
573740,110027,1709_0_1_0500_0529_0_1,1203,DU,1709,1709_0_1,2,2,2025-10-14 03:59:17+00:00,1,DU,2025-10-14,20251014
573741,110027,1709_0_1_0500_0529_0_1,1203,DU,1709,1709_0_1,1,1,2025-10-15 03:59:21+00:00,1,DU,2025-10-15,20251015
573742,110027,1709_0_1_0500_0529_0_1,1203,DU,1709,1709_0_1,2,2,2025-10-16 04:00:18+00:00,1,DU,2025-10-16,20251016
573743,110027,1709_0_1_0500_0529_0_1,1203,DU,1709,1709_0_1,3,3,2025-10-17 03:59:31+00:00,1,DU,2025-10-17,20251017


In [98]:
agg_validations

Unnamed: 0,stop_id,trip_id,vehicle_id,dia_tipo,line_id,num_validations,num_unique_cards,created_at,day_type,day_tipo,service_date
396042,20957,3508_0_1_0700_0729_0_ESC_DU,0,DU,3508,1,1,2025-10-14 06:18:30+00:00,1,DU,2025-10-14
2766116,140051,3508_0_2_0600_0629_0_ESC_DU,0,DU,3508,1,1,2025-10-14 05:23:29+00:00,1,DU,2025-10-14
2806126,140105,3108_0_2_0930_0959_1_ESC_DU,0,DU,3108,1,1,2025-10-13 08:59:08+00:00,1,DU,2025-10-13
2812204,140108,3508_0_2_1100_1129_0_ESC_DU,0,DU,3508,1,1,2025-10-14 11:05:19+00:00,1,DU,2025-10-14
2813269,140109,3508_0_1_0700_0729_0_ESC_DU,0,DU,3508,1,1,2025-10-14 06:43:58+00:00,1,DU,2025-10-14
...,...,...,...,...,...,...,...,...,...,...,...
3367228,162008,4562_0_1|1600|0845,13991,DU,4562,1,1,2025-10-16 07:43:34+00:00,1,DU,2025-10-16
3367229,162008,4562_0_1|1600|0845,13991,DU,4562,1,1,2025-10-16 07:43:38+00:00,1,DU,2025-10-16
3367230,162008,4562_0_1|1600|0845,13991,DU,4562,1,1,2025-10-16 07:43:42+00:00,1,DU,2025-10-16
3367231,162008,4562_0_1|1600|0845,13991,DU,4562,1,1,2025-10-16 07:43:48+00:00,1,DU,2025-10-16


In [78]:
agg_validations

Unnamed: 0,stop_id,trip_id,vehicle_id,dia_tipo,line_id,device_id,num_validations,num_unique_cards,first_validation,day_type,day_tipo,service_date
94251,20957,3508_0_1_0700_0729_0_ESC_DU,0,DU,3508,monet_23963309,1,1,2025-10-14 06:18:30+00:00,1,DU,2025-10-14
779477,140051,3508_0_2_0600_0629_0_ESC_DU,0,DU,3508,monet_23963309,1,1,2025-10-14 05:23:29+00:00,1,DU,2025-10-14
787636,140105,3108_0_2_0930_0959_1_ESC_DU,0,DU,3108,monet_23963309,1,1,2025-10-13 08:59:08+00:00,1,DU,2025-10-13
788931,140108,3508_0_2_1100_1129_0_ESC_DU,0,DU,3508,monet_23963309,1,1,2025-10-14 11:05:19+00:00,1,DU,2025-10-14
789208,140109,3508_0_1_0700_0729_0_ESC_DU,0,DU,3508,monet_23963309,1,1,2025-10-14 06:43:58+00:00,1,DU,2025-10-14
...,...,...,...,...,...,...,...,...,...,...,...,...
915647,162008,4549_0_1|1500|0750,13991,DU,4549,11758,1,1,2025-10-15 07:13:55+00:00,1,DU,2025-10-15
915656,162008,4549_0_2|1500|0730,13991,DU,4549,11758,22,22,2025-10-15 06:29:05+00:00,1,DU,2025-10-15
915709,162008,4562_0_1|1400|0915,13991,DU,4562,11758,12,12,2025-10-14 08:21:22+00:00,1,DU,2025-10-14
915714,162008,4562_0_1|1400|1145,13991,DU,4562,11758,20,20,2025-10-14 10:38:54+00:00,1,DU,2025-10-14
