# Tests de Feature Engineering

In [3]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np

## DETECTAR RUTA Y PUNTO DE INICIO ESTABLE

In [6]:
import pandas as pd, numpy as np, math

DIST_THRESH_M   = 200.0   # distancia máxima a la ruta para considerar "pegado"
MIN_POINTS      = 8       # puntos mínimos en la racha inicial
MIN_PROGRESS_M  = 600.0   # avance mínimo (m) a lo largo de la ruta en la racha
MAX_BACKTRACK_M = 80.0    # retroceso máximo permitido entre puntos consecutivos (m)
FRAC_WITHIN     = 0.8     # % de puntos dentro del umbral en la ventana

def prepare_route_geoms(df_stations):
    geoms = {}  # (LINEA, DIR) -> dict(rx, ry, route_cum, length_m, lat0, lon0, is_circular)
    for (linea, d), g in df_stations.sort_values("ORDEN").groupby(["LINEA","DIR"], sort=False):
        
        # usa columnas lat/lon
        latv = g[[c for c in g.columns if c.upper()=="LAT"][0]].astype(float).to_numpy()
        lonv = g[[c for c in g.columns if c.upper()=="LON"][0]].astype(float).to_numpy()

        if len(latv) < 2: 
            continue

        lat0, lon0 = float(latv.mean()), float(lonv.mean())
        rx, ry = ll_to_xy_m(latv, lonv, lat0, lon0)         # ya tienes ll_to_xy_m
        route_cum = cumulative_distances(rx, ry)            # ya tienes cumulative_distances
        length_m = float(route_cum[-1])
        
        # ruta cirucular?
        is_circ = str(d).upper()=="CIRCULAR"

        geoms[(linea, d)] = dict(rx=rx, ry=ry, route_cum=route_cum, length_m=length_m,
                                 lat0=lat0, lon0=lon0, is_circular=is_circ)
        
    return geoms

def meters_per_degree(lat_deg):
    lat = math.radians(lat_deg)
    mlat = 111132.92 - 559.82*math.cos(2*lat) + 1.175*math.cos(4*lat) - 0.0023*math.cos(6*lat)
    mlon = 111412.84*math.cos(lat) - 93.5*math.cos(3*lat) + 0.118*math.cos(5*lat)
    return mlat, mlon

def ll_to_xy_m(lat, lon, lat0, lon0):
    mlat, mlon = meters_per_degree(lat0)
    return (lon - lon0)*mlon, (lat - lat0)*mlat

def cumulative_distances(x, y):
    dx = np.diff(x); dy = np.diff(y)
    return np.concatenate([[0.0], np.cumsum(np.sqrt(dx*dx + dy*dy))])

def project_point_to_polyline(px, py, rx, ry, route_cum):
    if len(rx) < 2: return float('inf'), float('nan')
    dx = np.diff(rx); dy = np.diff(ry)
    seg2 = dx*dx + dy*dy
    ax = rx[:-1]; ay = ry[:-1]
    pax = px - ax;  pay = py - ay
    t = np.divide(pax*dx + pay*dy, seg2, out=np.zeros_like(seg2), where=seg2>0)
    t = np.clip(t, 0, 1)
    projx = ax + t*dx; projy = ay + t*dy
    d2 = (px - projx)**2 + (py - projy)**2
    i = int(np.argmin(d2))
    s = float(route_cum[i] + t[i]*(np.sqrt(seg2[i]) if seg2[i]>0 else 0))
    return float(np.sqrt(d2[i])), s

def find_first_adherent_index(dist_m, s_m,
                              distance_thresh_m=200.0,
                              min_points=8,
                              min_progress_m=600.0,
                              max_backtrack_m=80.0,
                              window_points=None,
                              frac_within=0.8,
                              route_length=None,
                              is_circular=False):
    
    if window_points is None: window_points = min_points
    n = len(dist_m)
    
    def diffs_forward(s):
        ds = np.diff(s)
        if is_circular and (route_length is not None) and (route_length > 0):
            wrap_mask = ds < -0.5*route_length
            ds = np.where(wrap_mask, ds + route_length, ds)
        return ds
    
    def good(i0, i1):
        d = dist_m[i0:i1]; s = s_m[i0:i1]
        inside = np.isfinite(d) & (d <= distance_thresh_m)
        
        if inside.size == 0 or np.mean(inside) < frac_within: return False
        
        s_ok = np.isfinite(s)
        if not np.any(s_ok): return False
        
        s2 = s[s_ok]
        ds = diffs_forward(s2)
        back = np.maximum(0.0, -ds)
        
        if np.any(back > max_backtrack_m): return False
        prog = float(np.nanmax(s2) - np.nanmin(s2))
        return prog >= min_progress_m
    
    for start in range(0, n - window_points + 1):
        if good(start, start + window_points):
            j = start + window_points
            while j < n and good(start, j): j += 1
            return start, j - 1
    return None, None

def adhesion_metrics_for_candidate(lat, lon, route,
                                   DIST_THRESH_M=200.0,
                                   MIN_POINTS=8,
                                   MIN_PROGRESS_M=600.0,
                                   MAX_BACKTRACK_M=80.0,
                                   FRAC_WITHIN=0.8,
                                   ts=None,
                                   idx=None):
    dist_m, s_m = snap_track_to_route(lat, lon, route)
    t0, t1 = find_first_adherent_index(dist_m, s_m,
                                       distance_thresh_m=DIST_THRESH_M,
                                       min_points=MIN_POINTS,
                                       min_progress_m=MIN_PROGRESS_M,
                                       max_backtrack_m=MAX_BACKTRACK_M,
                                       frac_within=FRAC_WITHIN,
                                       route_length=route["length_m"],
                                       is_circular=route["is_circular"])
    if t0 is None:
        return None  # no hay adhesión sostenida
    
    run = slice(t0, t1+1)
    mean_dev = float(np.nanmean(dist_m[run]))
    frac_in  = float(np.mean(dist_m[run] <= DIST_THRESH_M))
    progress = float(np.nanmax(s_m[run]) - np.nanmin(s_m[run]))
    dur_pts  = int(t1 - t0 + 1)
    
    result = dict(t_start=t0, t_end=t1, mean_dev=mean_dev,
                frac_in=frac_in, progress=progress, dur_pts=dur_pts,
                route_len=route["length_m"])
    
    # timestamps e índices originales
    if ts is not None:
        ts = pd.to_datetime(ts, errors="coerce")
        result["t_start_ts"] = ts.iloc[t0]
        result["t_end_ts"]   = ts.iloc[t1]
    if idx is not None:
        idx = np.asarray(idx)
        result["idx_start"] = int(idx[t0])
        result["idx_end"]   = int(idx[t1])

    return result

# --- proyectar el trip a una ruta
def snap_track_to_route(lat, lon, route):
    px, py = ll_to_xy_m(lat, lon, route["lat0"], route["lon0"])
    pairs = [project_point_to_polyline(px[i], py[i], route["rx"], route["ry"], route["route_cum"])
             for i in range(len(px))]
    dist_m = np.fromiter((p[0] for p in pairs), dtype=float, count=len(pairs))
    s_m    = np.fromiter((p[1] for p in pairs), dtype=float, count=len(pairs))
    return dist_m, s_m


### ---- Funciones para shortlist de líneas ----

# Construir mapas de ruta (LINEA, DIR) -> lista de ESTACION en ORDEN
def build_route_dicts(route_df):
    route_map = {}
    station_pos = {}
    for (linea, d), g in route_df.sort_values("ORDEN").groupby(["LINEA","DIR"], sort=False):
        seq = g["ESTACION"].tolist()
        route_map[(linea, d)] = seq
        for i, est in enumerate(seq):
            station_pos[(linea, d, est)] = i
    return route_map, station_pos

# LIS O(n log n) con reconstrucción de índices
def lis_indices(seq):
    import bisect
    tails = []
    tails_idx = []
    prev = [-1]*len(seq)
    pos = []
    for i, x in enumerate(seq):
        j = bisect.bisect_left(tails, x)
        if j == len(tails):
            tails.append(x)
            tails_idx.append(i)
            pos.append(i)
        else:
            tails[j] = x
            tails_idx[j] = i
            pos[j] = i
        if j > 0:
            prev[i] = pos[j-1]
    if not tails_idx:
        return 0, []
    k = tails_idx[-1]
    out = []
    while k != -1:
        out.append(k)
        k = prev[k]
    out.reverse()
    return len(out), out


In [131]:
# ----------- EJECUCIÓN PRINCIPAL -----------

def parse_pos(s):
        a, b = [float(t.strip()) for t in str(s).split(",")]
        return pd.Series({"LAT": a, "LON": b})
    
# Parámetros y rutas
UNIT = "u213"
TRACK_CSV   = f"../clean_data/{UNIT}/{UNIT}_clean_trips.csv"
STATIONS_XLS= "../data/Estaciones_ordenadas_with_pos.xlsx"

# Carga estaciones (Excel): usa POSICIÓN "lat, lon" y ordena por ORDEN.
stations_ord = pd.read_excel(STATIONS_XLS)
stations_ord[["LAT", "LON"]] = stations_ord["POSICIÓN"].apply(parse_pos)
stations_ord = stations_ord.sort_values("ORDEN")

# Estaciones únicas de cada línea
unique_stations = stations_ord[["LINEA","DIR","ESTACION","LAT","LON"]].drop_duplicates()
unique_stations_by_line = {}
for linea, g in unique_stations.groupby("LINEA", sort=False):
    unique_stations_by_line[linea] = g[["ESTACION","LAT","LON"]].drop_duplicates().reset_index(drop=True)

# Carga trips
df = pd.read_csv(TRACK_CSV, dtype={"trip_id": str})
df["Fecha"] = pd.to_datetime(df["Fecha"], errors="coerce")
df = df.sort_values(["trip_id","Fecha"])

# Trip específico para pruebas
# df = df[df["trip_id"] == "407"]

# Secuencia por trip de estaciones cercanas
trip_seqs = (
    df.dropna(subset=["estacion_cercana"])
      .assign(estacion_cercana=lambda x: x["estacion_cercana"].astype(str))
      .groupby("trip_id")["estacion_cercana"]
      .apply(list)
      .to_dict()
)

# Por trip, hacer un shortlist de K rutas candidatas:
""" K = 3
trip_candidates = {}  # tid -> list[(key=(LINEA,DIR), length, coverage, last_prog)]
for tid, est_seq in trip_seqs.items():
    cands = []
    for key, seq in route_map.items():
        positions = [station_pos.get((key[0], key[1], s)) for s in est_seq]
        mapped = [p for p in positions if p is not None]
        if not mapped:
            continue
        length, lis_idx = lis_indices(mapped)
        coverage = len(set([positions[i] for i in range(len(positions)) if positions[i] is not None]))
        last_prog = mapped[lis_idx[-1]] if lis_idx else -1
        cands.append((key, length, coverage, last_prog))
    cands.sort(key=lambda x: (x[1], x[2], x[3]), reverse=True)
    trip_candidates[tid] = cands[:K] if cands else [] """

# Ranking por adhesión sostenida (geometría) ===
geoms = prepare_route_geoms(stations_ord) 

rows2 = []
for tid, g in df.groupby("trip_id", sort=False):
    g = g.sort_values("Fecha")
    unique_stations_visited = g["estacion_cercana"].dropna().astype(str).unique().tolist()
    lat = g["Latitud"].to_numpy(dtype=float)
    lon = g["Longitud"].to_numpy(dtype=float)
    ts  = g["Fecha"]                      # Series alineada con lat/lon
    idx = g.index                         # índices reales en df
    
    # Preselección de rutas a probar:
    cand_list = None

    if 'SAN RAFAEL' in unique_stations_visited or 'PARAÍSO' in unique_stations_visited:
        cand_list = [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 18 - B'.casefold()]
        
    elif 'ATLÁNTIDA' in unique_stations_visited:
        cand_list = [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 18 - A'.casefold()] + [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 18 - B'.casefold()]
        
    elif 'CEJUSA ANDÉN SUR' in unique_stations_visited:
        cand_list = [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 7'.casefold()]
        
    elif 'JOCOTENANGO' in unique_stations_visited:
        cand_list = [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 2'.casefold()]
        
    elif 'CENTRO ZONA 6' in unique_stations_visited:
        cand_list = [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 6'.casefold()]
        
    elif 'EXPOSICIÓN' in unique_stations_visited:
        cand_list = [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 13 - A'.casefold()] + [ (key, 0, 0, 0) for key in geoms.keys() if key[0].strip().casefold() == 'Linea 13 - B'.casefold()]
        
        
    # si no hay shortlist, prueba TODAS las rutas (puede ser más lento)
    if not cand_list:
        cand_list = [ (key, 0, 0, 0) for key in geoms.keys() ]

    scored = []
    for (linea, d), _, _, _ in cand_list:
        route = geoms.get((linea, d))
        if route is None: 
            continue
        
        min_points = MIN_POINTS
        min_progress = MIN_PROGRESS_M
        max_backtrack = MAX_BACKTRACK_M
        
        # AJUSTE PARA CASOS ESPECIALES (mucho ruido)
        # rutas muy cortas: relaja mínimo de puntos y distancia mínima
        if len(route["rx"]) < MIN_POINTS:
            min_points = len(route["rx"])
            min_progress = 300
            max_backtrack = 150
            
        
        met = adhesion_metrics_for_candidate(lat, lon, route,
                                             DIST_THRESH_M, min_points, min_progress,
                                             max_backtrack, FRAC_WITHIN, ts=ts, idx=idx)
        if met is None:
            continue

        # Score adhesión:
        #   - mucha duración y progreso (normalizado por largo ruta)
        #   - alta fracción dentro del umbral
        #   - poca desviación promedio
        #   - inicio temprano favorecido
        dur = met["dur_pts"]
        prog_norm = met["progress"]
        score = (2.5*prog_norm) + (1.5*met["frac_in"]) + (0.5*dur/len(lat)) - (met["mean_dev"]/DIST_THRESH_M) - (0.3*met["t_start"]/max(len(lat),1))

        scored.append({
            "trip_id": tid, "LINEA": linea, "DIR": d,
            "t_start": met["t_start"], "t_end": met["t_end"],
            "t_start_ts": met.get("t_start_ts"), "t_end_ts": met.get("t_end_ts"),
            "idx_start": met.get("idx_start"), "idx_end": met.get("idx_end"),
            "progress_m": met["progress"], "mean_dev_m": met["mean_dev"],
            "frac_in": met["frac_in"], "dur_pts": dur, "score": float(score)
        })

    if scored:
        scored.sort(key=lambda r: (r["LINEA"], r["DIR"]))
        best_by_line = {}
        for r in scored:
            key = r["LINEA"]
            if key not in best_by_line:
                best_by_line[key] = r
            else:
                # si es la misma línea, favorece el inicio más temprano
                cur = best_by_line[key]
                if (r["t_start"] < cur["t_start"]) and (r["score"] >= 0.5*cur["score"]):
                    best_by_line[key] = r
                    
        best = max(best_by_line.values(), key=lambda r: r["score"])
        rows2.append(best)
    else:
        rows2.append({"trip_id": tid, "LINEA": None, "DIR": None,
                      "t_start": None, "t_start_ts": None, "idx_start": None,
                      "t_end": None, "t_end_ts": None, "idx_end": None,
                      "progress_m": 0.0, "mean_dev_m": np.nan,
                      "frac_in": 0.0, "dur_pts": 0, "score": -1e9})

route_scores_df = pd.DataFrame(rows2)

# Ordenar resultados por trip_id numéricamente
route_scores_df["trip_id_num"] = pd.to_numeric(route_scores_df["trip_id"], errors="coerce")
route_scores_df = route_scores_df.sort_values("trip_id_num").drop(columns=["trip_id_num"])

In [132]:
route_scores_df

Unnamed: 0,trip_id,LINEA,DIR,t_start,t_end,t_start_ts,t_end_ts,idx_start,idx_end,progress_m,mean_dev_m,frac_in,dur_pts,score
0,1,Linea 6,IDA,424,435,2024-01-02 11:26:40,2024-01-02 11:34:42,424,435,2602.345788,71.184944,1.000000,12,6506.893882
63,2,Linea 13 - A,IDA,10,18,2024-01-03 04:46:39,2024-01-03 04:50:53,1067,1075,1370.267022,20.655694,1.000000,9,3427.068625
138,3,Linea 13 - A,IDA,4,31,2024-01-03 09:52:13,2024-01-03 10:11:50,1406,1433,10054.839615,13.471440,1.000000,28,25138.548931
206,4,Linea 13 - A,IDA,8,26,2024-01-04 04:40:43,2024-01-04 04:51:09,2152,2170,10063.946625,39.752059,0.947368,19,25161.108968
276,5,Linea 13 - A,IDA,11,23,2024-01-04 14:34:41,2024-01-04 14:44:28,2508,2520,6683.077862,39.732191,0.923077,13,16708.888629
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310,552,Linea 13 - A,IDA,11,24,2025-04-25 15:24:50,2025-04-25 15:37:16,292762,292775,6533.612533,41.596280,0.928571,14,16335.224115
311,553,Linea 6,IDA,49,59,2025-04-26 10:00:18,2025-04-26 10:08:21,293268,293278,2416.678042,83.572495,1.000000,11,6042.766546
312,554,Linea 13 - A,IDA,9,18,2025-04-28 04:43:42,2025-04-28 04:51:02,294088,294097,7380.906913,42.812711,0.900000,10,18453.406859
314,561,Linea 6,IDA,31,54,2025-04-29 16:37:58,2025-04-29 16:58:30,294742,294765,2576.819445,33.365169,1.000000,24,6443.389994


In [136]:
# Guardar resultados
OUT_CSV = f"../data_with_features/{UNIT}/{UNIT}_trip_routes.csv"

# Crear carpeta si no existe
import os
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)

route_scores_df.to_csv(OUT_CSV, index=False)

In [133]:
# Cantidad de viajes
print(df.groupby("trip_id").ngroups)
print(len(route_scores_df))

348
348


In [134]:
# VIAJES QUE MUY PROBABLEMENTE HICIERON DOS LÍNEAS

# Viajes con t_start > 300
multiline_trips =route_scores_df[route_scores_df["t_start"] > 300]
multiline_trips

Unnamed: 0,trip_id,LINEA,DIR,t_start,t_end,t_start_ts,t_end_ts,idx_start,idx_end,progress_m,mean_dev_m,frac_in,dur_pts,score
0,1,Linea 6,IDA,424,435,2024-01-02 11:26:40,2024-01-02 11:34:42,424,435,2602.345788,71.184944,1.0,12,6506.893882
288,52,Linea 6,IDA,624,637,2024-05-23 14:00:15,2024-05-23 14:10:07,35737,35750,655.049009,198.810321,0.785714,14,1637.623911
126,282,Linea 6,IDA,500,518,2024-10-07 13:30:55,2024-10-07 13:44:03,143165,143183,2447.743323,37.201393,1.0,19,6120.537335
183,365,Linea 6,IDA,426,436,2024-12-02 10:56:48,2024-12-02 11:06:48,187202,187212,691.080511,194.747653,0.727273,11,1727.698429
265,484,Linea 2,CIRCULAR,1024,1059,2025-03-05 19:13:30,2025-03-05 19:40:03,253956,253991,2493.49253,44.113293,1.0,36,6234.750919


In [None]:
UNIT = 'u058'

# Visualizar resultados
# Graficar la polilínea del inicio de cada viaje con folium

def plot_trip_with_adherence(g_trip, met, outfile="trip_map.html"):
    """
    g_trip: DataFrame solo del trip (ya filtrado), ordenado por Fecha
    met: dict con t_start_ts, t_end_ts, idx_start, idx_end
    """
    import folium

    # Localiza la posición (iloc) dentro de g_trip a partir de los índices reales
    pos0 = int(met["t_start"])
    pos1 = int(met["t_end"])

    # Rebanadas seguras
    g_pre  = g_trip.iloc[:pos0]
    g_run  = g_trip.iloc[pos0:pos1+1]
    g_post = g_trip.iloc[pos1+1:]

    # (Opcional) sanity checks útiles
    print("Check tiempos:",
          g_run["Fecha"].iloc[0], "vs", met["t_start_ts"],
          "|", g_run["Fecha"].iloc[-1], "vs", met["t_end_ts"])

    # Centro del mapa
    center = [g_run["Latitud"].iloc[0], g_run["Longitud"].iloc[0]]
    m = folium.Map(location=center, zoom_start=13)

    def add_poly(df, color, weight=3, opacity=0.9, name="segment"):
        if df.empty: return
        coords = df[["Latitud","Longitud"]].values.tolist()
        folium.PolyLine(coords, color=color, weight=weight, opacity=opacity, tooltip=name).add_to(m)

    # Pinta “antes”, “racha adherida”, “después”
    add_poly(g_pre,  "#e31717", 2, 0.6, "antes")
    add_poly(g_run,  "#23d30f", 4, 0.9, "adherido (t_start → t_end)")
    add_poly(g_post, "#650fd6", 2, 0.6, "después")

    # Marcas de inicio/fin de la racha
    folium.Marker(
        [g_run["Latitud"].iloc[0], g_run["Longitud"].iloc[0]],
        popup=f"t_start ({met['t_start_ts']})",
        icon=folium.Icon(color="green", icon="play")
    ).add_to(m)
    folium.Marker(
        [g_run["Latitud"].iloc[-1], g_run["Longitud"].iloc[-1]],
        popup=f"t_end ({met['t_end_ts']})",
        icon=folium.Icon(color="red", icon="stop")
    ).add_to(m)

    m.save(outfile)
    print("Mapa guardado en:", outfile)

TRIP_ID = 553
df = pd.read_csv(TRACK_CSV, dtype={"trip_id": str})
g_trip = df[df['trip_id'] == f'{TRIP_ID}'].sort_values("Fecha")
# route_scores_df = pd.read_csv(OUT_CSV, dtype={"trip_id": str})
trip_met = route_scores_df[route_scores_df["trip_id"] == str(TRIP_ID)].iloc[0].to_dict()

print(trip_met)

plot_trip_with_adherence(g_trip, trip_met, outfile=f"../data_with_features/{UNIT}/trip_{TRIP_ID}_map.html")


{'trip_id': '553', 'LINEA': 'Linea 6', 'DIR': 'IDA', 't_start': 49, 't_end': 59, 't_start_ts': Timestamp('2025-04-26 10:00:18'), 't_end_ts': Timestamp('2025-04-26 10:08:21'), 'idx_start': 293268, 'idx_end': 293278, 'progress_m': 2416.6780423202335, 'mean_dev_m': 83.5724947641372, 'frac_in': 1.0, 'dur_pts': 11, 'score': 6042.766545652344}
Check tiempos: 2025-04-26 10:00:18 vs 2025-04-26 10:00:18 | 2025-04-26 10:08:21 vs 2025-04-26 10:08:21
Mapa guardado en: ../data_with_features/u213/trip_553_map.html


In [119]:
unit_98_df = pd.read_csv(f"../clean_data/{UNIT}/{UNIT}_clean_trips.csv")
test_98_trip_407 = unit_98_df[unit_98_df['trip_id'] == 407]
test_98_trip_407.to_csv('./test_98_trip_407.csv')

In [85]:
# Mostrar segmentos del viaje en los que no se registra movimiento en X tiempo, es decir, que LATITUD y LONGITUD se mantienen constantes

no_movement_segments = []
time_threshold = pd.Timedelta(minutes=5)  # Umbral de tiempo para considerar "sin movimiento"

df_trip = df[df["trip_id"] == "5"].sort_values("Fecha")
df_trip["Latitud"] = df_trip["Latitud"].astype(float)
df_trip["Longitud"] = df_trip["Longitud"].astype(float)
df_trip["Fecha"] = pd.to_datetime(df_trip["Fecha"], errors="coerce")
df_trip["Time_Diff"] = df_trip["Fecha"].diff().fillna(pd.Timedelta(seconds=0))
df_trip["Lat_Diff"] = df_trip["Latitud"].diff().fillna(0.0)
df_trip["Lon_Diff"] = df_trip["Longitud"].diff().fillna(0.0)
df_trip["No_Movement"] = (df_trip["Lat_Diff"].abs() < 1e-6) & (df_trip["Lon_Diff"].abs() < 1e-6)
df_trip["No_Movement_Group"] = (df_trip["No_Movement"] != df_trip["No_Movement"].shift()).cumsum()

for group, g in df_trip.groupby("No_Movement_Group"):
    if g["No_Movement"].iloc[0] and g["Time_Diff"].sum() >= time_threshold:
        no_movement_segments.append({
            "start_time": g["Fecha"].iloc[0],
            "end_time": g["Fecha"].iloc[-1],
            "duration": g["Time_Diff"].sum()
        })

# Mostrar resultados
print("Segmentos sin movimiento detectados:")
for segment in no_movement_segments:
    print(f"Desde {segment['start_time']} hasta {segment['end_time']} - Duración: {segment['duration']}")
    
# Generar mapa en folium de estos segmentos
import folium
fm = folium.Map(location=[df_trip["Latitud"].mean(), df_trip["Longitud"].mean()], zoom_start=13)
for segment in no_movement_segments:
    start = segment['start_time']
    end = segment['end_time']
    seg_points = df_trip[(df_trip["Fecha"] >= start) & (df_trip["Fecha"] <= end)]
    for _, row in seg_points.iterrows():
        folium.CircleMarker(location=[row["Latitud"], row["Longitud"]],
                            radius=5, color='red', fill=True, fill_color='red',
                            popup=f"Sin movimiento: {row['Fecha']}").add_to(fm)
# Guardar el mapa en un archivo HTML
fm.save("no_movement_segments_map.html")


Segmentos sin movimiento detectados:
Desde 2024-01-08 05:14:51 hasta 2024-01-08 05:28:09 - Duración: 0 days 00:14:18
Desde 2024-01-08 05:30:09 hasta 2024-01-08 05:39:09 - Duración: 0 days 00:10:00
Desde 2024-01-08 05:41:09 hasta 2024-01-08 05:47:09 - Duración: 0 days 00:07:00
Desde 2024-01-08 06:25:43 hasta 2024-01-08 06:37:17 - Duración: 0 days 00:12:34
Desde 2024-01-08 06:57:33 hasta 2024-01-08 07:13:07 - Duración: 0 days 00:16:34
Desde 2024-01-08 07:53:57 hasta 2024-01-08 07:59:27 - Duración: 0 days 00:06:30
Desde 2024-01-08 08:16:50 hasta 2024-01-08 08:29:35 - Duración: 0 days 00:13:45
Desde 2024-01-08 10:27:34 hasta 2024-01-08 10:35:47 - Duración: 0 days 00:09:13
Desde 2024-01-08 10:55:05 hasta 2024-01-08 11:03:06 - Duración: 0 days 00:09:01
Desde 2024-01-08 11:23:49 hasta 2024-01-08 11:32:09 - Duración: 0 days 00:09:20
Desde 2024-01-08 11:50:20 hasta 2024-01-08 12:00:02 - Duración: 0 days 00:10:42
Desde 2024-01-08 12:56:56 hasta 2024-01-08 13:04:06 - Duración: 0 days 00:08:10
Des

## DETECTAR PRÓXIMA ESTACIÓN OPERATIVA

In [83]:
import pandas as pd, numpy as np
from pathlib import Path
import math

# Funciones reutilizadas del script anterior (preparar rutas, proyección, etc.)
def meters_per_degree(lat_deg):
    lat = math.radians(lat_deg)
    mlat = 111132.92 - 559.82*math.cos(2*lat) + 1.175*math.cos(4*lat) - 0.0023*math.cos(6*lat)
    mlon = 111412.84*math.cos(lat) - 93.5*math.cos(3*lat) + 0.118*math.cos(5*lat)
    return mlat, mlon

def ll_to_xy_m(lat, lon, lat0, lon0):
    mlat, mlon = meters_per_degree(lat0)
    return (lon - lon0)*mlon, (lat - lat0)*mlat

def cumulative_distances(x, y):
    dx = np.diff(x); dy = np.diff(y)
    return np.concatenate([[0.0], np.cumsum(np.sqrt(dx*dx + dy*dy))])

""" def project_point_to_polyline(px, py, route, prev_seg_idx=None, s_before_jump=0, large_jumps=0, k=8):
    
    
    Proyecta (px,py) sobre 'route' eligiendo dentro de una ventana de segmentos.
    Prioriza la siguiente estación según el orden de la línea 'route', pero si
    hay un salto grande en s, permite buscar más lejos.
    
    # umbral para considerar un salto en s (distancia longitudinal) como "grande"
    large_jump_threshold = 1500.0
    
    rx, ry, rc = route["rx"], route["ry"], route["route_cum"]
    dx, dy = np.diff(rx), np.diff(ry)
    seg2 = dx*dx + dy*dy
    seg_len = np.sqrt(seg2, where=seg2>0, out=np.zeros_like(seg2))
    ax, ay = rx[:-1], ry[:-1]
    
    # --- Priorizar la estación siguiente teórica en la línea ---
    
    if prev_seg_idx is not None:
        
        # Determinar si la unidad ya pasó a la siguiente estación
        pax = px - ax[prev_seg_idx];  pay = py - ay[prev_seg_idx]
        t = np.divide(pax*dx[prev_seg_idx] + pay*dy[prev_seg_idx], seg2[prev_seg_idx],
                      out=np.zeros_like(pax), where=seg2[prev_seg_idx]>0)
        t = np.clip(t, 0, 1)
        projx = ax[prev_seg_idx] + t*dx[prev_seg_idx]; projy = ay[prev_seg_idx] + t*dy[prev_seg_idx]
        d2 = (px - projx)**2 + (py - projy)**2
        s_before_jump = rc[prev_seg_idx] + t*(seg_len[prev_seg_idx] if seg_len[prev_seg_idx]>0 else 0)
        
        
        # Distancia longitudinal al último punto antes del salto detectado
        s_next = rc[prev_seg_idx + 1] if prev_seg_idx + 1 < len(rc) else rc[-1]
        ds = s_next - s_before_jump
        
        if abs(ds) > large_jump_threshold:
                        
            if s_before_jump == 0:
                # Primer salto grande detectado
                s_before_jump = abs(ds)
            
        else:
            large_jumps = 0
            s_before_jump = 0

        if large_jumps <= 10:
            # Si no hay muchos saltos grandes recientes, devolver la siguiente estación
            if ds >= 0 and prev_seg_idx < len(seg_len):
                j = prev_seg_idx
                s = float(rc[j] + seg_len[j])
                d2 = (px - rx[j])**2 + (py - ry[j])**2
                return float(np.sqrt(d2)), s, j, s_before_jump
            
    # --- Si hay demasiados saltos grandes recientes, la unidad pudo haberse desviado, ampliar la búsqueda de la siguiente estación ---
    
    # Tomar el último salto detectado como un verdadero cambio de trayectoria
    s_before_jump = 0
    
    k = 8

    # Candidatos
    if prev_seg_idx is not None:
        i0 = max(0, prev_seg_idx - k)
        i1 = min(len(dx)-1, prev_seg_idx + k)
        cand = np.arange(i0, i1+1)
    else:
        cand = np.arange(len(dx))
    if cand.size == 0:
        return float('inf'), float('nan'), None

    # Proyección para todos los candidatos
    pax = px - ax[cand];  pay = py - ay[cand]
    t = np.divide(pax*dx[cand] + pay*dy[cand], seg2[cand],
                  out=np.zeros_like(seg2[cand]), where=seg2[cand]>0)
    t = np.clip(t, 0, 1)
    projx = ax[cand] + t*dx[cand]; projy = ay[cand] + t*dy[cand]
    d2 = (px - projx)**2 + (py - projy)**2
    s_cand = rc[cand] + t*seg_len[cand]

    j_loc = int(np.argmin(d2))
    j = int(cand[j_loc])
    s = float(s_cand[j_loc])
    return float(np.sqrt(d2[j_loc])), s, j, s_before_jump # VERSIÓN SIN SALTOS GRANDES
 """

def project_point_to_polyline(px, py, route, prev_seg_idx=None, prev_s=None, k=8,
                              max_step_fwd=200.0, back_tolerance=20.0,
                              lam_back=1e-2, lam_fwd=1e-3, lam_idx=1e-4):
    rx, ry, rc = route["rx"], route["ry"], route["route_cum"]
    dx, dy = np.diff(rx), np.diff(ry)
    ax, ay = rx[:-1], ry[:-1]
    seg2 = dx*dx + dy*dy
    seg_len = np.sqrt(seg2, where=seg2>0, out=np.zeros_like(seg2))

    # Ventana local
    if prev_seg_idx is not None:
        i0 = max(0, prev_seg_idx - k)
        i1 = min(len(dx)-1, prev_seg_idx + k)
        cand = np.arange(i0, i1+1)
    else:
        cand = np.arange(len(dx))
    if cand.size == 0:
        return float('inf'), float('nan'), None, False

    pax = px - ax[cand];  pay = py - ay[cand]
    t = np.divide(pax*dx[cand] + pay*dy[cand], seg2[cand],
                  out=np.zeros_like(seg2[cand]), where=seg2[cand]>0)
    t = np.clip(t, 0, 1)
    projx = ax[cand] + t*dx[cand]
    projy = ay[cand] + t*dy[cand]
    d2 = (px - projx)**2 + (py - projy)**2
    s_cand = rc[cand] + t*seg_len[cand]

    # Costo total
    cost = d2.copy()
    if prev_s is not None:
        back = np.maximum(0.0, prev_s - s_cand - back_tolerance)
        cost += lam_back * back*back
        fwd_excess = np.maximum(0.0, s_cand - (prev_s + max_step_fwd))
        cost += lam_fwd * fwd_excess*fwd_excess
    if prev_seg_idx is not None:
        cost += lam_idx * (cand - prev_seg_idx)**2
        
    # Determinar si hay que cambiar de dirección
    
    switch_direction = False
    
    # Primero, si ya se detectó el último índice, cambiar
    if prev_seg_idx == len(dx) - 1:
        switch_direction = True
        print("Cambio de dirección por índice menor")
        
    # Segundo, si la proyección más cercana está muy atrás, cambiar
    if prev_seg_idx is not None:
        j_loc_min = int(np.argmin(d2))
        j_min = int(cand[j_loc_min])
        if j_min < prev_seg_idx - 3:
            switch_direction = True
            print("Cambio de dirección por proyección atrás")

    j_loc = int(np.argmin(cost))
    j = int(cand[j_loc])
    s = float(s_cand[j_loc])
    d = float(np.sqrt(d2[j_loc]))
    return d, s, j, switch_direction


def prepare_route_geoms(stations_ord):
    geoms = {}
    for (linea, d), g in stations_ord.sort_values("ORDEN").groupby(["LINEA","DIR"], sort=False):
        latv = g["LAT"].astype(float).to_numpy()
        lonv = g["LON"].astype(float).to_numpy()
        if len(latv) < 2: 
            continue
        lat0, lon0 = float(latv.mean()), float(lonv.mean())
        rx, ry = ll_to_xy_m(latv, lonv, lat0, lon0)
        route_cum = cumulative_distances(rx, ry)
        geoms[(linea, d)] = dict(rx=rx, ry=ry, route_cum=route_cum, lat0=lat0, lon0=lon0,
                                 length_m=float(route_cum[-1]),
                                 is_circular=(str(d).upper()=="CIRCULAR"))
    return geoms

# Construir mapas de ruta (LINEA, DIR) -> lista de ESTACION en ORDEN
def build_route_dicts(route_df):
    route_map = {}
    station_pos = {}
    for (linea, d), g in route_df.sort_values("ORDEN").groupby(["LINEA","DIR"], sort=False):
        seq = g["ESTACION"].tolist()
        route_map[(linea, d)] = seq
        for i, est in enumerate(seq):
            station_pos[(linea, d, est)] = i
    return route_map, station_pos

def parse_pos(s):
        a, b = [float(t.strip()) for t in str(s).split(",")]
        return pd.Series({"LAT": a, "LON": b})


In [84]:
# ---- Helpers específicos para continuidad IDA/VUELTA ----
def opposite_dir(d):
    d = str(d).strip().upper()
    if d == "IDA": return "VUELTA"
    if d == "VUELTA": return "IDA"
    return d

""" def project_many_on_route(route, lat_arr, lon_arr, k=8,
                          max_step_fwd=200.0, max_step_back=80.0,
                          lam_forw=1e-4, lam_back=1e-4):
    px, py = ll_to_xy_m(lat_arr, lon_arr, route["lat0"], route["lon0"])
    s_list, d_list = [], []
    prev_seg, prev_s = None, None
    
    # Llevar conteo de los registros en los que el salto de s es muy grande
    large_jumps = 0
    s_before_jump = 0  # para comparar el salto de s
    
    for x, y in zip(px, py):

        d, s, prev_seg, s_before_jump = project_point_to_polyline(x, y, route, prev_seg_idx=prev_seg, s_before_jump=s_before_jump, large_jumps=large_jumps, k=k)
        s_list.append(s)
        d_list.append(d)
        prev_s = s
        
        if s_before_jump > 0:
            large_jumps += 1
            
        else:
            large_jumps = 0  # Reiniciar conteo si se descartó o aceptó el salto como cambio de trayectoria
            s_before_jump = 0
            
    return np.array(s_list, float), np.array(d_list, float) """ # VERSIÓN SIN SALTOS GRANDES
    
def project_many_on_route(route, lat_arr, lon_arr, k=2,
                          max_step_fwd=200.0, back_tolerance=20.0,
                          lam_back=1e-2, lam_fwd=1e-3, lam_idx=1e-4):
    px, py = ll_to_xy_m(lat_arr, lon_arr, route["lat0"], route["lon0"])
    s_list, d_list = [], []
    prev_seg, prev_s = None, None

    switch_direction = False
    cut_n = 0  # cuántos puntos logramos proyectar

    for idx, (x, y) in enumerate(zip(px, py)):
        d, s, prev_seg, sw = project_point_to_polyline(
            x, y, route, prev_seg_idx=prev_seg, prev_s=prev_s, k=k,
            max_step_fwd=max_step_fwd, back_tolerance=back_tolerance,
            lam_back=lam_back, lam_fwd=lam_fwd, lam_idx=lam_idx
        )

        # histéresis anti-retroceso
        if prev_s is not None and s < prev_s:
            s = max(prev_s - back_tolerance, s)

        s_list.append(s)
        d_list.append(d)
        prev_s = s
        cut_n = idx + 1

        if sw:
            switch_direction = True
            break  # detenemos aquí para que el caller procese este tramo y luego cambie de dir

    # done=True si consumimos TODO lat_arr/lon_arr (no hubo switch)
    done = (cut_n == len(lat_arr))
    return np.array(s_list, float), np.array(d_list, float), switch_direction, cut_n, done

def next_station_on_route(route, stations_df, s_arr):
    """
    Para cada s en s_arr (posición sobre la ruta), devuelve:
    - nombre de próxima estación
    - delta de distancia hasta esa estación
    """
    st = stations_df.sort_values("s_est")
    s_est = st["s_est"].to_numpy(float)
    names = st["ESTACION"].tolist()

    idxs = np.searchsorted(s_est, s_arr, side="right")
    prox_names = []
    deltas = []
    for s, idx in zip(s_arr, idxs):
        
        if idx >= len(s_est):
            # fin de ruta
            if route["is_circular"]:
                prox_names.append(names[0])
                deltas.append(route["length_m"] - s + s_est[0])
            else:
                prox_names.append(names[-1])
                deltas.append(max(0.0, s_est[-1] - s))  # clamp a 0
        else:
            prox_names.append(names[idx])
            deltas.append(s_est[idx] - s)

        
    return prox_names, np.array(deltas, float)

def end_of_dir_mask(s_arr, last_s_est, eps=20.0):
    """Marca puntos que ya están ~al final de la dirección actual."""
    return s_arr >= (last_s_est - eps)

# =========================
# Próxima estación teórica con continuidad
# =========================
def compute_next_for_trip(
    trip: pd.DataFrame,
    trip_stable_start_row: pd.Series,   # fila de trip_routes (tiene LINEA, DIR, idx_start, etc.)
    geoms: dict,
    stations_by_key: dict,
    win_confirm_pts: int = 8,           # ~ventana corta para confirmar VUELTA
    eps_end_m: float = 20.0,            # tolerancia para considerar "final de ruta"
    dist_margin: float = 20.0,
    min_progress_confirm: float = 200.0,
    dist_thresh: float = 200.0,
    frac_within: float = 0.75
) -> pd.DataFrame:
    """
    Calcula s_m, dist_m, proxima_est_teorica y dist_a_prox_m para un trip,
    usando la DIR inferida a partir de idx_start; permite cambiar a la dir opuesta
    cuando el bus llega al final y la ventana confirma el cambio.

    - NO mezcla estaciones de IDA y VUELTA en un mismo 's_est' (cada DIR tiene su marco).
    - Antes de idx_start, rellena con la primera próxima estación calculada en idx_start.
    """
    out = trip.copy()

    # 1) Desde dónde es válido este trip:
    stable_start_index = int(trip_stable_start_row.get("idx_start", 0)) if trip_stable_start_row is not None else 0
    if stable_start_index < 0 or stable_start_index >= len(out):
        stable_start_index = 0

    work = out.iloc[stable_start_index:].copy().reset_index(drop=False)  # guarda índice original en 'index'

    # 2) Ruta activa inicial (según inferencia)
    linea = trip_stable_start_row.get("LINEA")
    dir0  = trip_stable_start_row.get("DIR")
    
    if pd.isna(linea) or pd.isna(dir0):
        # Sin línea o dir inferidas, devuelve NaN/None
        out.loc[:, ["s_m","dist_m","proxima_est_teorica","dist_a_prox_m"]] = [np.nan, np.nan, None, np.nan]
        return out
    
    # Para líneas con IDA/VUELTA
    dir1 = None
    route1 = None
    
    if dir0 in ["IDA", "VUELTA"]:
        dir1 = opposite_dir(dir0)

    # --- setup rutas/estaciones actuales ---
    key_dir0 = (linea, dir0)
    route_0 = geoms.get(key_dir0)
    if route_0 is None or key_dir0 not in stations_by_key:
        out.loc[:, ["s_m","dist_m","proxima_est_teorica","dist_a_prox_m"]] = [np.nan, np.nan, None, np.nan]
        return out

    dir1 = opposite_dir(dir0) if dir0 in ["IDA","VUELTA"] else None
    route_1 = geoms.get((linea, dir1)) if dir1 else None

    def pick_route_and_st(d):
        r = geoms.get((linea, d))
        st = stations_by_key.get((linea, d), pd.DataFrame()).sort_values("s_est")
        return r, st

    current_dir = dir0
    current_route, st_cur = pick_route_and_st(current_dir)

    # arrays del tramo válido
    latv = work["Latitud"].to_numpy(float)
    lonv = work["Longitud"].to_numpy(float)
    n = len(latv)

    # prealoca columnas en work
    work["s_m"] = np.nan
    work["dist_m"] = np.nan
    work["proxima_est_teorica"] = None
    work["dist_a_prox_m"] = np.nan
    work["DIR"] = None

    p = 0  # puntero posicional
    safe_guard = 0
    max_iters = 2 * n + 10  # por seguridad

    while p < n and safe_guard < max_iters:
        safe_guard += 1

        s_arr, d_arr, switched, cut_n, done = project_many_on_route(
            current_route, latv[p:], lonv[p:],
            k=2, max_step_fwd=200.0, back_tolerance=20.0,
            lam_back=1e-2, lam_fwd=1e-3, lam_idx=1e-4
        )

        if cut_n == 0:
            # nada proyectado: evita loop
            break

        # Próximas estaciones en el tramo proyectado
        prox_names, deltas = next_station_on_route(current_route, st_cur, s_arr)

        # Escribir SOLO el tramo [p : p+cut_n)
        idx_slice = work.index[p:p+cut_n]
        work.loc[idx_slice, "s_m"] = s_arr
        work.loc[idx_slice, "dist_m"] = d_arr
        work.loc[idx_slice, "proxima_est_teorica"] = [str(x) if x is not None else None for x in prox_names]
        work.loc[idx_slice, "dist_a_prox_m"] = deltas
        work.loc[idx_slice, "DIR"] = current_dir

        p += cut_n

        if switched and dir1:
            # conmutar dir y sus estaciones
            current_dir = dir1 if current_dir == dir0 else dir0
            current_route, st_cur = pick_route_and_st(current_dir)
            # continúa el while con la nueva dirección
        elif done:
            break

    # Vuelca 'work' al 'out' respetando el índice original guardado en 'index'
    out.loc[work["index"], ["s_m","dist_m","proxima_est_teorica","dist_a_prox_m","DIR"]] = \
        work[["s_m","dist_m","proxima_est_teorica","dist_a_prox_m","DIR"]].values

    # Relleno previo a stable_start_index
    if stable_start_index > 0 and len(work) > 0:
        first_est = work["proxima_est_teorica"].iloc[0]
        first_dst = work["dist_a_prox_m"].iloc[0]
        out.loc[out.index[:stable_start_index], "proxima_est_teorica"] = first_est
        out.loc[out.index[:stable_start_index], "dist_a_prox_m"] = first_dst
        out.loc[out.index[:stable_start_index], "DIR"] = work["DIR"].iloc[0]


    # 5) Copia resultados al DataFrame original
    #    (recuerda que 'work' tiene la columna 'index' con el índice original)
    out.loc[work["index"], ["s_m","dist_m","proxima_est_teorica","dist_a_prox_m","DIR"]] = work[["s_m","dist_m","proxima_est_teorica","dist_a_prox_m","DIR"]].values

    # 6) Antes del índice estable, usa la primera próxima estación del punto estable
    if stable_start_index > 0 and len(work) > 0:
        first_est = work["proxima_est_teorica"].iloc[0]
        first_dst = work["dist_a_prox_m"].iloc[0]
        out.loc[out.index[:stable_start_index], "proxima_est_teorica"] = first_est
        out.loc[out.index[:stable_start_index], "dist_a_prox_m"] = first_dst

    return out

In [85]:
UNIT = "u098"
CLEAN_TRIPS_CSV = f"../clean_data/{UNIT}/{UNIT}_clean_trips.csv"
ROUTES_CSV      = f"../data_with_features/{UNIT}/{UNIT}_trip_routes.csv"

In [86]:
def station_s_on_route(route, lat, lon):
    x, y = ll_to_xy_m(lat, lon, route["lat0"], route["lon0"])
    rx, ry, rc = route["rx"], route["ry"], route["route_cum"]
    dx, dy = np.diff(rx), np.diff(ry)
    ax, ay = rx[:-1], ry[:-1]
    seg2 = dx*dx + dy*dy
    # t de proyección, recortado a [0,1]
    t = np.divide((x - ax)*dx + (y - ay)*dy, seg2,
                  out=np.zeros_like(seg2), where=seg2>0)
    t = np.clip(t, 0, 1)
    projx = ax + t*dx
    projy = ay + t*dy
    d2 = (x - projx)**2 + (y - projy)**2
    j = int(np.argmin(d2))                     # mejor segmento
    s = float(rc[j] + t[j]*np.sqrt(seg2[j]))   # s sobre la ruta
    return s

# Leer estaciones ordenadas

STATIONS_XLS= "../data/Estaciones_ordenadas_with_pos.xlsx"
stations_ord = pd.read_excel(STATIONS_XLS)
stations_ord[["LAT", "LON"]] = stations_ord["POSICIÓN"].apply(parse_pos)
stations_ord = stations_ord.sort_values("ORDEN")
geoms = prepare_route_geoms(stations_ord)
route_map, station_pos = build_route_dicts(stations_ord)

stations_by_key = {}

for key, seq in route_map.items():
    route = geoms.get(key)
    if route is None:
        continue
    rows = stations_ord[(stations_ord["LINEA"]==key[0]) & (stations_ord["DIR"]==key[1])]
    st_rows = []
    for est in seq:
        r = rows[rows["ESTACION"]==est]
        if r.empty: 
            continue
        lat, lon = float(r["LAT"].iloc[0]), float(r["LON"].iloc[0])
        s_est = station_s_on_route(route, lat, lon)
        st_rows.append({"ESTACION": est, "LAT": lat, "LON": lon, "s_est": s_est})
    stations_by_key[key] = pd.DataFrame(st_rows).sort_values("s_est").reset_index(drop=True)


# Probar con un solo trip

trip_id = "1"
df = pd.read_csv(CLEAN_TRIPS_CSV, dtype={"trip_id": str})
df["Fecha"] = pd.to_datetime(df["Fecha"], errors="coerce")
df = df.sort_values(["trip_id","Fecha"])
route_scores_df = pd.read_csv(ROUTES_CSV, dtype={"trip_id": str})
trip = df[df["trip_id"] == trip_id].copy()
trip_scores = route_scores_df[route_scores_df["trip_id"] == trip_id]

result = compute_next_for_trip(
    trip=trip,  # df del trip (ordenado por Fecha)
    trip_stable_start_row=trip_scores.iloc[0] if not trip_scores.empty else pd.Series(),
    geoms=geoms,
    stations_by_key=stations_by_key,
    win_confirm_pts=8,           # ajustable
    eps_end_m=20.0,              # tolerancia para “final de ruta”
    dist_margin=20.0,            # margen de cercanía a ruta opuesta
    min_progress_confirm=200.0,  # progreso mínimo en ventana para confirmar cambio
    dist_thresh=200.0,
    frac_within=0.75
)


trip.to_csv(f"./test_{UNIT}_trip_{trip_id}_input.csv", index=False)
result.to_csv(f"./test_{UNIT}_trip_{trip_id}_with_next_station.csv", index=False)

Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor
Cambio de dirección por índice menor


  out.loc[work["index"], ["s_m","dist_m","proxima_est_teorica","dist_a_prox_m","DIR"]] = \
  out.loc[work["index"], ["s_m","dist_m","proxima_est_teorica","dist_a_prox_m","DIR"]] = \
