In [11]:
# ── BLOQUE 1: PREPROCESAMIENTO ESTÁTICO Y GUARDADO ─────────────────────────────────

import pickle
import pandas as pd
import math
import requests
import random
from datetime import datetime, timedelta
from filtros_df import filter_labors_by_date, filter_labors_by_city

# — Parámetros —————————————————————————————————————————————————————————————
ITERATIONS              = 5
ALPHA                   = 1
SEED                    = 10
DISTANCE_METHOD         = 'Manhattan'
alfred_speed            = 30   # km/h para conductor hacia primer punto
vehicle_transport_speed = 40   # km/h para transporte de vehículo
tiempo_previo           = 30   # minutos antes del schedule_date
tiempo_gracia           = 15
tiempo_alistar          = 15
tiempo_other            = 30
tiempo_finalizacion     = 15

random.seed(SEED)

# — Cargar distancias precalculadas ——————————————————————————————————————————
with open('distances.pkl', 'rb') as f:
    DIST_DICT = pickle.load(f)

# — Cargar datos base ———————————————————————————————————————————————————————
directorio_df = pd.read_csv("Datos/Procesados/directorio_df.csv")
df_dist       = pd.read_pickle("df_dist.pkl")

# — 0) Filtrado y mix automático de días en base_day ———————————————————————————
start_date = "2023-01-02"
end_date   = "2023-01-03"

# Filtrado por ciudad (Bogotá=149) y por rango de fechas
df_bogota = filter_labors_by_city(df_dist, 149)
df_dia    = filter_labors_by_date(df_bogota, start_date=start_date, end_date=end_date)
df_dia    = df_dia.query("state_service!='CANCELED'") \
                 .sort_values(['service_id','labor_start_date']) \
                 .reset_index(drop=True)

# — 0.2) Remapeo fechas a base_day ————————————————————————————————————————
base_day = pd.to_datetime(start_date).date()
def remap_to_base(dt):
    if pd.isna(dt): return dt
    delta = (base_day - dt.date()).days
    return dt + timedelta(days=delta)

for c in ['schedule_date','labor_start_date','labor_end_date']:
    df_dia[c] = df_dia[c].apply(remap_to_base)

# — 0.3) Construir services_map_df ————————————————————————————————————————
map_src = df_dia[[
    'service_id','labor_id','labor_category',
    'start_address_point','end_address_point','address_point',
    'labor_start_date'
]].sort_values(['service_id','labor_start_date']).reset_index(drop=True)

rows = []
for svc, grp in map_src.groupby('service_id', sort=False):
    n    = len(grp)
    idxs = grp.index.tolist()
    for i, idx in enumerate(idxs):
        r = grp.loc[idx]
        if n == 1:
            sp, ep = r['start_address_point'], r['end_address_point']
        else:
            if r['labor_category']=='VEHICLE_TRANSPORTATION':
                sp = r['start_address_point']
                ep = grp.iloc[i+1]['end_address_point'] if i<n-1 else r['end_address_point']
            else:
                sp = ep = r['address_point']
        rows.append({
            'service_id':      svc,
            'labor_id':        r['labor_id'],
            'map_start_point': sp,
            'map_end_point':   ep
        })
services_map_df = pd.DataFrame(rows)
services_map_df['labor_id'] = services_map_df['labor_id'].astype(df_dia['labor_id'].dtype)

# — 1) Funciones auxiliares: parse_point, distance —————————————————————————————
def parse_point(s):
    if pd.isna(s) or not s.strip().startswith("POINT"): return None, None
    lon, lat = map(float, s.lstrip('POINT').strip(' ()').split())
    return lat, lon

OSRM_URL = "http://router.project-osrm.org/route/v1/driving/"
def distance(p1, p2, method='haversine', timeout=5):
    if method=='precalced':
        return DIST_DICT.get((p1,p2), float('nan'))
    lat1, lon1 = parse_point(p1)
    lat2, lon2 = parse_point(p2)
    if None in (lat1,lon1,lat2,lon2): return float('nan')
    if method=='haversine':
        φ1,φ2 = map(math.radians,(lat1,lat2))
        dφ    = math.radians(lat2-lat1)
        dλ    = math.radians(lon2-lon1)
        a     = math.sin(dφ/2)**2 + math.cos(φ1)*math.cos(φ2)*math.sin(dλ/2)**2
        return 2*6371*math.atan2(math.sqrt(a), math.sqrt(1-a))
    if method=='osrm':
        coords = f"{lon1},{lat1};{lon2},{lat2}"
        try:
            r = requests.get(OSRM_URL+coords+"?overview=false", timeout=timeout)
            r.raise_for_status()
            return r.json()['routes'][0]['distance']/1000
        except:
            return distance(p1,p2,method='haversine')
    # Manhattan
    KM_PER_DEG_LAT = 111.32
    mean_lat = math.radians((lat1+lat2)/2)
    dlat = abs(lat1-lat2)*KM_PER_DEG_LAT
    dlon = abs(lon1-lon2)*KM_PER_DEG_LAT*math.cos(mean_lat)
    return dlat + dlon

# — 2) process_group (con corrección para A_idx vacío) —————————————————————————
def process_group(grp):
    # si sólo tiene 1 labor, la dejamos si es VEHICLE_TRANSPORTATION
    if len(grp)==1:
        return grp if grp.iloc[0]['labor_category']=='VEHICLE_TRANSPORTATION' else grp.iloc[0:0]

    A_idx = list(grp.index[grp['labor_category']=='VEHICLE_TRANSPORTATION'])
    # nueva línea: si no hay ninguna VEHICLE_TRANSPORTATION, descartamos el servicio
    if not A_idx:
        return grp.iloc[0:0]

    B_idx = [i for i in grp.index[grp['labor_category']!='VEHICLE_TRANSPORTATION']
             if pd.notna(grp.at[i,'shop']) and pd.notna(grp.at[i,'address_point'])]
    if not B_idx:
        return grp.iloc[0:0]

    inits   = [i for i in A_idx if grp.at[i,'labor_name']=='Alfred Initial Transport']
    firstA  = inits[0] if inits else A_idx[0]
    A_rem   = [i for i in A_idx if i!=firstA]

    start_pt = grp['start_address_point'].iloc[0]
    dist_map = {i: distance(start_pt, grp.at[i,'address_point']) for i in B_idx}
    B_sorted = sorted(B_idx, key=lambda i: dist_map[i])[:len(A_idx)-1]

    A_rem_sorted = sorted(A_rem, key=lambda i: grp.at[i,'labor_start_date'])
    needed_A     = [firstA] + A_rem_sorted[:len(B_sorted)]

    final = []
    for j, b in enumerate(B_sorted):
        final += [needed_A[j], b]
    final.append(needed_A[-1])

    return grp.loc[final]

# — 3) Construir df_cleaned_template y merge con map points ——————————————————————
cleaned = []
for svc, grp in df_dia.groupby('service_id', sort=False):
    pg = process_group(grp)
    if not pg.empty:
        cleaned.append(pg)
df_cleaned_template = (
    pd.concat(cleaned, ignore_index=True)
      .merge(services_map_df, on=['service_id','labor_id'], how='left')
)

# — 4) avg_times_map de df_dist —————————————————————————————————————————————
df_temp = (
    df_dist
      .dropna(subset=['labor_start_date','labor_end_date'])
      .assign(duration_td=lambda d: d['labor_end_date']-d['labor_start_date'])
)
df_temp = df_temp[df_temp['duration_td']<=pd.Timedelta(days=1)]
df_temp['duration_min'] = df_temp['duration_td'].dt.total_seconds()/60
avg_times_map = df_temp.groupby('labor_name')['duration_min'].mean().to_dict()

# — 5) Guardar plantilla para BLOQUE 2 ——————————————————————————————————————
df_cleaned_template.to_pickle('cleaned_static_template.pkl')
print("✅ BLOQUE 1 completado: plantilla estática guardada en 'cleaned_static_template.pkl'")




df_cleaned_template['day'] = df_cleaned_template['schedule_date'].dt.date

# 3) Agrupar por día y contar número de labors
daily_counts = (
    df_cleaned_template
      .groupby('day')
      .size()
      .reset_index(name='num_labors')
      .sort_values('num_labors', ascending=False)
      .reset_index(drop=True)
)

# 4) Mostrar resultados
print("Días con más labors (después de procesar Bloque 1):")
print(daily_counts)

✅ BLOQUE 1 completado: plantilla estática guardada en 'cleaned_static_template.pkl'
Días con más labors (después de procesar Bloque 1):
          day  num_labors
0  2023-01-02          36


In [14]:
# ── BLOQUE 1: PREPROCESAMIENTO ESTÁTICO POR DÍA Y GUARDADO ────────────────────────────

import os
import pickle
import pandas as pd
import math
import requests
import random
from datetime import datetime, timedelta
from filtros_df import filter_labors_by_city

# — Parámetros —————————————————————————————————————————————————————————————
SEED                    = 10
DISTANCE_METHOD         = 'Manhattan'
alfred_speed            = 30   # km/h para conductor hacia primer punto
vehicle_transport_speed = 40   # km/h para transporte de vehículo
tiempo_previo           = 30   # minutos antes del schedule_date
tiempo_gracia           = 15
tiempo_alistar          = 15
tiempo_other            = 30
tiempo_finalizacion     = 15

random.seed(SEED)

# — Crear carpeta de salida ————————————————————————————————————————————————
os.makedirs("processed_by_date", exist_ok=True)

# — Cargar datos base —————————————————————————————————————————————————————
directorio_df = pd.read_csv("Datos/Procesados/directorio_df.csv")
df_raw        = pd.read_pickle("df_dist.pkl")

# — Cargar distancias precalculadas ——————————————————————————————————————————
with open('distances.pkl', 'rb') as f:
    DIST_DICT = pickle.load(f)

# — Funciones auxiliares —————————————————————————————————————————————————————————

def parse_point(s):
    if pd.isna(s) or not s.strip().startswith("POINT"):
        return None, None
    lon, lat = map(float, s.lstrip('POINT').strip(' ()').split())
    return lat, lon

OSRM_URL = "http://router.project-osrm.org/route/v1/driving/"
def distance(p1, p2, method='haversine', timeout=5):
    if method == 'precalced':
        return DIST_DICT.get((p1, p2), float('nan'))
    lat1, lon1 = parse_point(p1)
    lat2, lon2 = parse_point(p2)
    if None in (lat1, lon1, lat2, lon2):
        return float('nan')
    if method == 'haversine':
        φ1, φ2 = map(math.radians, (lat1, lat2))
        dφ = math.radians(lat2 - lat1)
        dλ = math.radians(lon2 - lon1)
        a = math.sin(dφ/2)**2 + math.cos(φ1)*math.cos(φ2)*math.sin(dλ/2)**2
        return 2 * 6371 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    if method == 'osrm':
        coords = f"{lon1},{lat1};{lon2},{lat2}"
        try:
            r = requests.get(OSRM_URL + coords + "?overview=false", timeout=timeout)
            r.raise_for_status()
            return r.json()['routes'][0]['distance'] / 1000
        except:
            return distance(p1, p2, method='haversine')
    # Manhattan
    KM_PER_DEG_LAT = 111.32
    mean_lat = math.radians((lat1 + lat2) / 2)
    dlat = abs(lat1 - lat2) * KM_PER_DEG_LAT
    dlon = abs(lon1 - lon2) * KM_PER_DEG_LAT * math.cos(mean_lat)
    return dlat + dlon

def process_group(grp):
    if len(grp) == 1:
        return grp if grp.iloc[0]['labor_category']=='VEHICLE_TRANSPORTATION' else grp.iloc[0:0]
    A_idx = list(grp.index[grp['labor_category']=='VEHICLE_TRANSPORTATION'])
    if not A_idx:
        return grp.iloc[0:0]
    B_idx = [i for i in grp.index[grp['labor_category']!='VEHICLE_TRANSPORTATION']
             if pd.notna(grp.at[i,'shop']) and pd.notna(grp.at[i,'address_point'])]
    if not B_idx:
        return grp.iloc[0:0]
    inits  = [i for i in A_idx if grp.at[i,'labor_name']=='Alfred Initial Transport']
    firstA = inits[0] if inits else A_idx[0]
    A_rem  = [i for i in A_idx if i!=firstA]
    start_pt = grp['start_address_point'].iloc[0]
    dist_map = {i: distance(start_pt, grp.at[i,'address_point']) for i in B_idx}
    B_sorted = sorted(B_idx, key=lambda i: dist_map[i])[:len(A_idx)-1]
    A_rem_sorted = sorted(A_rem, key=lambda i: grp.at[i,'labor_start_date'])
    needed_A     = [firstA] + A_rem_sorted[:len(B_sorted)]
    final = []
    for j, b in enumerate(B_sorted):
        final += [needed_A[j], b]
    final.append(needed_A[-1])
    return grp.loc[final]

# — avg_times_map (histórico de df_raw) ———————————————————————————————————————————
df_temp = (
    df_raw
      .dropna(subset=['labor_start_date','labor_end_date'])
      .assign(duration_td=lambda d: d['labor_end_date']-d['labor_start_date'])
)
df_temp = df_temp[df_temp['duration_td'] <= pd.Timedelta(days=1)]
df_temp['duration_min'] = df_temp['duration_td'].dt.total_seconds() / 60
avg_times_map = df_temp.groupby('labor_name')['duration_min'].mean().to_dict()

# — Prefiltrar por ciudad (Bogotá=149) —————————————————————————————————————————
df_bogota = filter_labors_by_city(df_raw, 149)

# — Lista de días únicos según schedule_date —————————————————————————————————————
unique_days = sorted(df_bogota['schedule_date'].dt.date.dropna().unique())

for day in unique_days:
    day_str = day.isoformat()
    print(f"Procesando día {day_str}…")

    # 0) filtro **directo** por schedule_date.date == day
    df_dia = df_bogota[df_bogota['schedule_date'].dt.date == day].copy()
    if df_dia.empty:
        print(f"  → Sin datos para {day_str}, saltando.")
        continue

    # 0.2) remapear fechas a base_day = day
    base_day = day
    def remap_to_base(dt):
        if pd.isna(dt): return dt
        delta = (base_day - dt.date()).days
        return dt + timedelta(days=delta)
    for c in ['schedule_date','labor_start_date','labor_end_date']:
        df_dia[c] = df_dia[c].apply(remap_to_base)

    # 0.3) construir services_map_df
    map_src = df_dia[[
        'service_id','labor_id','labor_category',
        'start_address_point','end_address_point','address_point',
        'labor_start_date'
    ]].sort_values(['service_id','labor_start_date']).reset_index(drop=True)

    rows = []
    for svc, grp in map_src.groupby('service_id', sort=False):
        n    = len(grp)
        idxs = grp.index.tolist()
        for i, idx in enumerate(idxs):
            r = grp.loc[idx]
            if n == 1:
                sp, ep = r['start_address_point'], r['end_address_point']
            else:
                if r['labor_category']=='VEHICLE_TRANSPORTATION':
                    sp = r['start_address_point']
                    ep = grp.iloc[i+1]['end_address_point'] if i<n-1 else r['end_address_point']
                else:
                    sp = ep = r['address_point']
            rows.append({
                'service_id':      svc,
                'labor_id':        r['labor_id'],
                'map_start_point': sp,
                'map_end_point':   ep
            })
    services_map_df = pd.DataFrame(rows)
    services_map_df['labor_id'] = services_map_df['labor_id'].astype(df_dia['labor_id'].dtype)

    # 3) generar plantilla limpia
    cleaned = []
    for svc, grp in df_dia.groupby('service_id', sort=False):
        pg = process_group(grp)
        if not pg.empty:
            cleaned.append(pg)

    # — NUEVO CHEQUEO: si tras process_group no quedó nada, saltamos —
    if not cleaned:
        print(f"  → No servicios válidos para {day_str}, saltando.")
        continue

    df_cleaned_template = (
        pd.concat(cleaned, ignore_index=True)
          .merge(services_map_df, on=['service_id','labor_id'], how='left')
    )

    # 4) guardar plantilla por día
    out_path = f"processed_by_date/cleaned_static_template_{day_str}.pkl"
    df_cleaned_template.to_pickle(out_path)
    print(f"  → guardado: {out_path}")

print("✅ BLOQUE 1 completado para todos los días.")


Procesando día 2023-01-02…
  → guardado: processed_by_date/cleaned_static_template_2023-01-02.pkl
Procesando día 2023-01-03…
  → guardado: processed_by_date/cleaned_static_template_2023-01-03.pkl
Procesando día 2023-01-04…
  → guardado: processed_by_date/cleaned_static_template_2023-01-04.pkl
Procesando día 2023-01-05…
  → guardado: processed_by_date/cleaned_static_template_2023-01-05.pkl
Procesando día 2023-01-06…
  → guardado: processed_by_date/cleaned_static_template_2023-01-06.pkl
Procesando día 2023-01-07…
  → guardado: processed_by_date/cleaned_static_template_2023-01-07.pkl
Procesando día 2023-01-10…
  → guardado: processed_by_date/cleaned_static_template_2023-01-10.pkl
Procesando día 2023-01-11…
  → guardado: processed_by_date/cleaned_static_template_2023-01-11.pkl
Procesando día 2023-01-12…
  → guardado: processed_by_date/cleaned_static_template_2023-01-12.pkl
Procesando día 2023-01-13…
  → guardado: processed_by_date/cleaned_static_template_2023-01-13.pkl
Procesando día 2023-

In [15]:
# Código para listar el top de días según número de labors procesadas

import glob
import os
import pandas as pd

# 1) Encuentra todos los archivos de plantillas por día
pattern = os.path.join("processed_by_date", "cleaned_static_template_*.pkl")
files = glob.glob(pattern)

# 2) Para cada archivo, extraer la fecha y contar las filas
day_counts = []
for path in files:
    # El nombre es ".../cleaned_static_template_YYYY-MM-DD.pkl"
    filename = os.path.basename(path)
    # Extraemos la parte "YYYY-MM-DD" del nombre
    day_str = filename.replace("cleaned_static_template_", "").replace(".pkl", "")
    # Cargamos el DataFrame y contamos las filas
    df = pd.read_pickle(path)
    day_counts.append({
        "day": day_str,
        "num_labors": len(df)
    })

# 3) Armamos el DataFrame de conteos y ordenamos de mayor a menor
df_counts = (
    pd.DataFrame(day_counts)
      .sort_values("num_labors", ascending=False)
      .reset_index(drop=True)
)

# 4) Mostramos el top 10 (o todos si lo prefieres)
top_n = 10
print(f"Top {top_n} días por número de labors procesadas:")
print(df_counts.head(top_n).to_string(index=False))


Top 10 días por número de labors procesadas:
       day  num_labors
2023-03-30         190
2023-01-17         189
2023-01-30         188
2023-01-31         164
2023-12-06         164
2023-01-16         161
2023-03-21         161
2023-03-31         161
2023-08-30         160
2023-05-30         158


In [17]:
df_counts

Unnamed: 0,day,num_labors
0,2023-03-30,190
1,2023-01-17,189
2,2023-01-30,188
3,2023-01-31,164
4,2023-12-06,164
...,...,...
726,2023-11-05,1
727,2025-06-09,1
728,2025-06-12,1
729,2025-06-16,1


In [2]:
import os
import pickle
import pandas as pd
import math
import requests
import random
from datetime import datetime, timedelta
from filtros_df import filter_labors_by_city

SEED = 10
DISTANCE_METHOD = 'Manhattan'
alfred_speed = 30
vehicle_transport_speed = 40
tiempo_previo = 30
tiempo_gracia = 15
tiempo_alistar = 15
tiempo_other = 30
tiempo_finalizacion = 15

random.seed(SEED)

os.makedirs("processed_by_date", exist_ok=True)

directorio_df = pd.read_csv("Datos/Procesados/directorio_df.csv")
df_raw = pd.read_csv("Datos/Procesados/df_dist.csv")
# asegurar columnas de fecha como datetime
date_cols = ['schedule_date', 'labor_start_date', 'labor_end_date']
for col in date_cols:
    df_raw[col] = pd.to_datetime(df_raw[col], errors='coerce')

with open('distances.pkl', 'rb') as f:
    DIST_DICT = pickle.load(f)


def parse_point(s):
    if pd.isna(s) or not isinstance(s, str) or not s.strip().startswith("POINT"):
        return None, None
    lon, lat = map(float, s.lstrip('POINT').strip(' ()').split())
    return lat, lon

OSRM_URL = "http://router.project-osrm.org/route/v1/driving/"
def distance(p1, p2, method='haversine', timeout=5):
    if method == 'precalced':
        return DIST_DICT.get((p1, p2), float('nan'))
    lat1, lon1 = parse_point(p1)
    lat2, lon2 = parse_point(p2)
    if None in (lat1, lon1, lat2, lon2):
        return float('nan')
    if method == 'haversine':
        φ1, φ2 = map(math.radians, (lat1, lat2))
        dφ = math.radians(lat2 - lat1)
        dλ = math.radians(lon2 - lon1)
        a = math.sin(dφ/2)**2 + math.cos(φ1)*math.cos(φ2)*math.sin(dλ/2)**2
        return 2 * 6371 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    if method == 'osrm':
        coords = f"{lon1},{lat1};{lon2},{lat2}"
        try:
            r = requests.get(OSRM_URL + coords + "?overview=false", timeout=timeout)
            r.raise_for_status()
            return r.json()['routes'][0]['distance'] / 1000
        except:
            return distance(p1, p2, method='haversine')
    mean_lat = math.radians((lat1 + lat2) / 2)
    dlat = abs(lat1 - lat2) * 111.32
    dlon = abs(lon1 - lon2) * 111.32 * math.cos(mean_lat)
    return dlat + dlon


def process_group(grp):
    if len(grp) == 1:
        return grp if grp.iloc[0]['labor_category'] == 'VEHICLE_TRANSPORTATION' else grp.iloc[0:0]
    A_idx = grp.index[grp['labor_category'] == 'VEHICLE_TRANSPORTATION'].tolist()
    if not A_idx:
        return grp.iloc[0:0]
    B_idx = [i for i in grp.index if grp.at[i,'labor_category'] != 'VEHICLE_TRANSPORTATION'
             and pd.notna(grp.at[i,'shop']) and pd.notna(grp.at[i,'address_point'])]
    if not B_idx:
        return grp.iloc[0:0]
    inits = [i for i in A_idx if grp.at[i,'labor_name'] == 'Alfred Initial Transport']
    firstA = inits[0] if inits else A_idx[0]
    A_rem = [i for i in A_idx if i != firstA]
    start_pt = grp['start_address_point'].iloc[0]
    dist_map = {i: distance(start_pt, grp.at[i,'address_point']) for i in B_idx}
    B_sorted = sorted(B_idx, key=lambda i: dist_map[i])[:len(A_idx)-1]
    A_rem_sorted = sorted(A_rem, key=lambda i: grp.at[i,'labor_start_date'])
    needed_A = [firstA] + A_rem_sorted[:len(B_sorted)]
    final = []
    for j, b in enumerate(B_sorted):
        final += [needed_A[j], b]
    final.append(needed_A[-1])
    return grp.loc[final]


df_temp = (
    df_raw.dropna(subset=['labor_start_date', 'labor_end_date'])
          .assign(duration_td=lambda d: d['labor_end_date'] - d['labor_start_date'])
)
df_temp = df_temp[df_temp['duration_td'] <= pd.Timedelta(days=1)]
df_temp['duration_min'] = df_temp['duration_td'].dt.total_seconds() / 60
avg_times_map = df_temp.groupby('labor_name')['duration_min'].mean().to_dict()

df_bogota = filter_labors_by_city(df_raw, 149)
unique_days = sorted(df_bogota['schedule_date'].dt.date.dropna().unique())

for day in unique_days:
    day_str = day.isoformat()
    print(f"Procesando día {day_str}…")
    df_dia = df_bogota[df_bogota['schedule_date'].dt.date == day].copy()
    if df_dia.empty:
        print(f"  → Sin datos para {day_str}, saltando.")
        continue
    base_day = day
    def remap_to_base(dt):
        if pd.isna(dt): return dt
        delta = (base_day - dt.date()).days
        return dt + timedelta(days=delta)
    for c in date_cols:
        df_dia[c] = df_dia[c].apply(remap_to_base)
    map_src = df_dia[['service_id', 'labor_id', 'labor_category',
                      'start_address_point', 'end_address_point', 'address_point',
                      'labor_start_date']]
    map_src = map_src.sort_values(['service_id', 'labor_start_date']).reset_index(drop=True)
    rows = []
    for svc, grp in map_src.groupby('service_id', sort=False):
        n = len(grp)
        idxs = grp.index.tolist()
        for i, idx in enumerate(idxs):
            r = grp.loc[idx]
            if n == 1:
                sp, ep = r['start_address_point'], r['end_address_point']
            else:
                if r['labor_category'] == 'VEHICLE_TRANSPORTATION':
                    sp = r['start_address_point']
                    ep = grp.iloc[i+1]['end_address_point'] if i < n-1 else r['end_address_point']
                else:
                    sp = ep = r['address_point']
            rows.append({'service_id': svc, 'labor_id': r['labor_id'],
                         'map_start_point': sp, 'map_end_point': ep})
    services_map_df = pd.DataFrame(rows)
    services_map_df['labor_id'] = services_map_df['labor_id'].astype(df_dia['labor_id'].dtype)
    cleaned = []
    for svc, grp in df_dia.groupby('service_id', sort=False):
        pg = process_group(grp)
        if not pg.empty:
            cleaned.append(pg)
    if not cleaned:
        print(f"  → No servicios válidos para {day_str}, saltando.")
        continue
    df_cleaned_template = (
        pd.concat(cleaned, ignore_index=True)
          .merge(services_map_df, on=['service_id','labor_id'], how='left')
    )
    out_path_pkl = f"processed_by_date/cleaned_static_template_{day_str}.pkl"
    out_path_csv = f"processed_by_date/cleaned_static_template_{day_str}.csv"
    df_cleaned_template.to_pickle(out_path_pkl)
    df_cleaned_template.to_csv(out_path_csv, index=False)
    print(f"  → guardado: {out_path_pkl} y {out_path_csv}")

print("✅ BLOQUE 1 completado para todos los días.")


Procesando día 2023-01-02…
  → guardado: processed_by_date/cleaned_static_template_2023-01-02.pkl y processed_by_date/cleaned_static_template_2023-01-02.csv
Procesando día 2023-01-03…
  → guardado: processed_by_date/cleaned_static_template_2023-01-03.pkl y processed_by_date/cleaned_static_template_2023-01-03.csv
Procesando día 2023-01-04…
  → guardado: processed_by_date/cleaned_static_template_2023-01-04.pkl y processed_by_date/cleaned_static_template_2023-01-04.csv
Procesando día 2023-01-05…
  → guardado: processed_by_date/cleaned_static_template_2023-01-05.pkl y processed_by_date/cleaned_static_template_2023-01-05.csv
Procesando día 2023-01-06…
  → guardado: processed_by_date/cleaned_static_template_2023-01-06.pkl y processed_by_date/cleaned_static_template_2023-01-06.csv
Procesando día 2023-01-07…
  → guardado: processed_by_date/cleaned_static_template_2023-01-07.pkl y processed_by_date/cleaned_static_template_2023-01-07.csv
Procesando día 2023-01-10…
  → guardado: processed_by_date