# Coffee Demand Forecasting — Notebook End-to-End (H ≤ 7 días)

**Objetivo:** Pronosticar **`transactions`** (demanda diaria) por **producto** a **1–7 días** organizado por secciones:

1. **Setup & Config**
2. **Carga de datos e inspección**
3. **Métricas**
4. **Splits de backtesting**
5. **Baselines (naive1, snaive7, ma7)**
6. **LightGBM Direct Multi‑Horizon**
7. **Prophet**
8. **Ejecución de backtesting y comparación**


## 1) Setup & Config

In [25]:
# Paquetes requeridos
# !pip install pandas numpy scikit-learn lightgbm prophet --quiet

from pathlib import Path
import pandas as pd
import numpy as np

# Configuración
DATA_PATH_CANDIDATES = [
r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\REPOSITORIO\coffee-sales-project\data\processed\coffee_ml_features.csv"
]
TARGET = "transactions"
HORIZON = 7
N_ORIGINS = 4

# Resolver ruta de datos
for _p in DATA_PATH_CANDIDATES:
    if Path(_p).exists():
        DATA_PATH = _p
        break
else:
    raise FileNotFoundError("No se encontró coffee_ml_features.csv en rutas conocidas.")

DATA_PATH

'D:\\Julian\\Estudio\\Maestria Inteligencia Artificial\\Materias\\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\\MICRO - PROYECTOS\\REPOSITORIO\\coffee-sales-project\\data\\processed\\coffee_ml_features.csv'

## 2) Carga de datos e inspección

In [26]:
df = pd.read_csv(DATA_PATH)
df["date"] = pd.to_datetime(df["date"])
df.head(10)

Unnamed: 0,date,revenue,transactions,avg_price,first_sale_hour,last_sale_hour,avg_sale_hour,year,month,day,...,total_daily_revenue,market_share_transactions,product_Americano,product_Americano with Milk,product_Cappuccino,product_Cocoa,product_Cortado,product_Espresso,product_Hot Chocolate,product_Latte
0,2024-03-01,28.9,1.0,28.9,15.0,15.0,15.0,2024,3,1,...,396.3,0.090909,True,False,False,False,False,False,False,False
1,2024-03-02,86.7,3.0,28.9,12.0,19.0,15.666667,2024,3,2,...,228.1,0.428571,True,False,False,False,False,False,False,False
2,2024-03-03,28.9,1.0,28.9,14.0,14.0,14.0,2024,3,3,...,349.1,0.1,True,False,False,False,False,False,False,False
3,2024-03-04,0.0,0.0,0.0,0.0,0.0,0.0,2024,3,4,...,135.2,0.0,True,False,False,False,False,False,False,False
4,2024-03-05,0.0,0.0,0.0,0.0,0.0,0.0,2024,3,5,...,338.5,0.0,True,False,False,False,False,False,False,False
5,2024-03-06,28.9,1.0,28.9,15.0,15.0,15.0,2024,3,6,...,170.2,0.2,True,False,False,False,False,False,False,False
6,2024-03-07,28.9,1.0,28.9,17.0,17.0,17.0,2024,3,7,...,220.1,0.166667,True,False,False,False,False,False,False,False
7,2024-03-08,86.7,3.0,28.9,15.0,16.0,15.666667,2024,3,8,...,265.5,0.375,True,False,False,False,False,False,False,False
8,2024-03-09,86.7,3.0,28.9,14.0,14.0,14.0,2024,3,9,...,479.4,0.214286,True,False,False,False,False,False,False,False
9,2024-03-10,30.0,1.0,30.0,9.0,9.0,9.0,2024,3,10,...,231.6,0.142857,True,False,False,False,False,False,False,False


In [27]:
schema = pd.DataFrame({
    "column": df.columns,
    "dtype": [str(t) for t in df.dtypes],
    "n_nulls": df.isna().sum().values,
    "n_unique": df.nunique().values,
})
schema

Unnamed: 0,column,dtype,n_nulls,n_unique
0,date,datetime64[ns],0,388
1,revenue,float64,0,113
2,transactions,float64,0,14
3,avg_price,float64,0,47
4,first_sale_hour,float64,0,18
5,last_sale_hour,float64,0,17
6,avg_sale_hour,float64,0,155
7,year,int64,0,2
8,month,int64,0,12
9,day,int64,0,31


In [28]:
df["date"].min(), df["date"].max(), df.shape

(Timestamp('2024-03-01 00:00:00'),
 Timestamp('2025-03-23 00:00:00'),
 (3104, 38))

## 3) Métricas  — MAE, RMSE, sMAPE y resumen

In [29]:
import numpy as np
import pandas as pd

def mae(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    mask = ~np.isnan(y_true) & ~np.isnan(y_pred)
    if mask.sum() == 0:
        return np.nan
    return np.mean(np.abs(y_true[mask] - y_pred[mask]))

def rmse(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    mask = ~np.isnan(y_true) & ~np.isnan(y_pred)
    if mask.sum() == 0:
        return np.nan
    return float(np.sqrt(np.mean((y_true[mask] - y_pred[mask]) ** 2)))

def mape(y_true, y_pred, epsilon: float = 1e-6, ignore_zeros: bool = True):
    """
    MAPE en %.

    Por defecto ignoramos los puntos donde |y_true| ~ 0 porque el MAPE clásico explota ahí.
    Si no se ignoran, usar ignore_zeros=False y reemplazar denominador por epsilon.
    """
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)

    if ignore_zeros:
        mask = np.abs(y_true) > epsilon
        if mask.sum() == 0:
            return np.nan
        return float(np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100.0)
    else:
        denom = np.where(np.abs(y_true) < epsilon, epsilon, np.abs(y_true))
        return float(np.mean(np.abs((y_true - y_pred)) / denom) * 100.0)

def smape(y_true, y_pred, epsilon: float = 1e-6):
    """sMAPE en % (estable ante ceros)."""
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    denom = (np.abs(y_true) + np.abs(y_pred)).clip(min=epsilon)
    return float(np.mean(2.0 * np.abs(y_pred - y_true) / denom) * 100.0)



def summarize_metrics(df, target_col: str = "y"):
    """
    Devuelve:
      - by_h: DataFrame con métricas por horizonte (h)
      - overall: dict con métricas globales
    """
    out = {}
    for h, g in df.groupby("h"):
        out[h] = {
            "MAE": mae(g[target_col], g["yhat"]),
            "RMSE": rmse(g[target_col], g["yhat"]),
            "MAPE": mape(g[target_col], g["yhat"], ignore_zeros=True),
            "sMAPE": smape(g[target_col], g["yhat"]),
        }
    overall = {
        "MAE": mae(df[target_col], df["yhat"]),
        "RMSE": rmse(df[target_col], df["yhat"]),
        "MAPE": mape(df[target_col], df["yhat"], ignore_zeros=True),
        "sMAPE": smape(df[target_col], df["yhat"]),
    }
    return pd.DataFrame(out).T, overall


## 4) Splits de backtesting (rolling-origin)

In [30]:
import pandas as pd
from typing import List, Tuple

def rolling_origins(date_index: pd.Series, n_origins: int = 4, horizon: int = 7):
    unique_dates = pd.Series(pd.to_datetime(pd.unique(date_index))).sort_values()
    anchors = [unique_dates.iloc[-(i+1)*horizon] for i in range(n_origins)][::-1]
    splits: List[Tuple[pd.Timestamp, pd.Timestamp, pd.Timestamp]] = []
    for anchor in anchors:
        train_end = anchor - pd.Timedelta(days=1)
        test_start = anchor
        test_end = anchor + pd.Timedelta(days=horizon - 1)
        splits.append((train_end, test_start, test_end))
    return splits

splits_demo = rolling_origins(df["date"], n_origins=4, horizon=7)
splits_demo


[(Timestamp('2025-02-23 00:00:00'),
  Timestamp('2025-02-24 00:00:00'),
  Timestamp('2025-03-02 00:00:00')),
 (Timestamp('2025-03-02 00:00:00'),
  Timestamp('2025-03-03 00:00:00'),
  Timestamp('2025-03-09 00:00:00')),
 (Timestamp('2025-03-09 00:00:00'),
  Timestamp('2025-03-10 00:00:00'),
  Timestamp('2025-03-16 00:00:00')),
 (Timestamp('2025-03-16 00:00:00'),
  Timestamp('2025-03-17 00:00:00'),
  Timestamp('2025-03-23 00:00:00'))]

## 5) Baselines — naive1, seasonal-naive(7), moving-average(7)

_baseline_predict(train, test_dates, horizon, target, kind):

naive1: repite el último valor observado.

snaive7: repite el patrón de los últimos 7 días.

ma7: usa la media de los últimos 7 días.

Define baselines por  producto.

Hace un smoke test en el primer split: entrena con el bloque de train y genera predicciones para el rango de test.

In [31]:
BASELINE_NAMES = ["naive1", "snaive7", "ma7"]

def _by_product(df: pd.DataFrame):
    prod_cols = [c for c in df.columns if c.startswith("product_")]
    dfx = df.copy()
    if len(prod_cols) == 0:
        raise ValueError("No se encontraron columnas product_* (one-hot).")
    dfx["product"] = dfx[prod_cols].idxmax(axis=1).str.replace("product_", "", regex=False)
    return dfx

def _baseline_predict(train: pd.DataFrame, test_dates: pd.DatetimeIndex, horizon: int, target: str, kind: str):
    pieces = []
    for prod, g in train.groupby("product"):
        g = g.sort_values("date")
        if kind == "naive1":
            last = g[target].iloc[-1]
            preds = [last] * horizon
        elif kind == "snaive7":
            hist = g[target].iloc[-7:].tolist()
            if len(hist) < 7:
                hist = [g[target].iloc[-1]] * 7
            preds = hist
        elif kind == "ma7":
            window = g[target].iloc[-7:]
            meanv = float(window.mean()) if len(window) > 0 else float(g[target].iloc[-1])
            preds = [meanv] * horizon
        else:
            raise ValueError(kind)
        dfp = pd.DataFrame({
            "date": test_dates,
            "product": prod,
            "h": list(range(1, horizon + 1)),
            "yhat": preds[:horizon],
        })
        pieces.append(dfp)
    return pd.concat(pieces, ignore_index=True)

df_prod = _by_product(df)
(train_end, test_start, test_end) = splits_demo[0]
train = df_prod[df_prod["date"] <= train_end]
test = df_prod[(df_prod["date"] >= test_start) & (df_prod["date"] <= test_end)].copy()
preds = _baseline_predict(train, pd.date_range(test_start, test_end, freq="D"), 7, "transactions", "snaive7")
preds.head()


Unnamed: 0,date,product,h,yhat
0,2025-02-24,Americano,1,7.0
1,2025-02-25,Americano,2,5.0
2,2025-02-26,Americano,3,6.0
3,2025-02-27,Americano,4,8.0
4,2025-02-28,Americano,5,11.0


## 6) LightGBM — Direct Multi‑Horizon

Se quiere predecir 7 días de demanda por producto

Directa por horizonte: 7 modelos distintos, uno por cada día futuro

No hay propagación de errores recursivamente; cada modelo aprende directamente su horizonte.

In [32]:
!pip install lightgbm prophet --quiet


[notice] A new release of pip available: 22.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [33]:
from lightgbm import LGBMRegressor

RANDOM_STATE = 42

def _build_direct_labels(df: pd.DataFrame, target: str, H: int = 7):
    dfx = df.sort_values(["product", "date"]).copy()
    for h in range(1, H + 1):
        dfx[f"y_{h}"] = dfx.groupby("product")[target].shift(-h)
    return dfx

def _feature_columns(df: pd.DataFrame, target: str):
    drop_like = {target, "date", "product"}
    cols = [c for c in df.columns if c not in drop_like and not c.startswith("y_")]
    return [c for c in cols if pd.api.types.is_numeric_dtype(df[c])]


## 7) Prophet — estacionalidad semanal

Modela cada producto por separado con Prophet activando estacionalidad semanal (weekly_seasonality=True).

Entrena con histórico hasta train_end y hace horizon días a futuro.

Consolidación: une las predicciones por producto y por fecha del bloque de test, calcula h y luego evalúa con summarize_metrics.

In [34]:
from prophet import Prophet

## 8) Backtesting end‑to‑end y comparación de modelos

In [35]:
import os
os.makedirs("results", exist_ok=True)

df_prod = _by_product(df)
splits = rolling_origins(df_prod["date"], n_origins=4, horizon=7)

all_results = []
for (train_end, test_start, test_end) in splits:
    train = df_prod[df_prod["date"] <= train_end]
    test = df_prod[(df_prod["date"] >= test_start) & (df_prod["date"] <= test_end)].copy()
    for name in ["naive1", "snaive7", "ma7"]:
        preds = _baseline_predict(train, pd.date_range(test_start, test_end, freq="D"), 7, "transactions", name)
        merged = test.merge(preds, on=["date", "product"], how="left")
        merged["model"] = name
        all_results.append(merged[["date", "product", "transactions", "h", "yhat", "model"]])
baseline_results = pd.concat(all_results, ignore_index=True)
by_h_baseline, overall_baseline = summarize_metrics(baseline_results.rename(columns={"transactions": "y"}))
by_h_baseline, overall_baseline


(        MAE      RMSE       MAPE      sMAPE
 1  1.507440  2.175300  65.109435  74.592093
 2  1.138393  1.729410  61.355219  76.410945
 3  1.209821  1.719919  60.912698  82.363135
 4  1.209821  1.747386  62.577839  80.546706
 5  1.534226  2.322207  61.316672  77.595400
 6  0.944940  1.385435  74.751984  91.046764
 7  0.800595  1.127878  74.603175  92.678669,
 {'MAE': 1.1921768707482994,
  'RMSE': 1.7855952341267196,
  'MAPE': 64.93002257883211,
  'sMAPE': 82.17624474604864})

In [36]:
baseline_results.to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\baselines_forecasts.csv", index=False)
by_h_baseline.to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\baselines_metrics_by_h.csv")
pd.DataFrame([overall_baseline]).to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\baselines_metrics_overall.csv", index=False)
"Baselines guardados"

'Baselines guardados'

In [37]:
df_lgbm = _build_direct_labels(df_prod, "transactions", H=7)
feats = _feature_columns(df_lgbm, "transactions")

lgbm_all = []
for (train_end, test_start, test_end) in splits:
    train = df_lgbm[df_lgbm["date"] <= train_end].copy()
    test = df_lgbm[(df_lgbm["date"] >= test_start) & (df_lgbm["date"] <= test_end)].copy()
    preds_blocks = []
    for h in range(1, 8):
        y_col = f"y_{h}"
        tr = train.dropna(subset=[y_col])
        if tr.empty:
            continue
        X_tr = tr[feats]
        y_tr = tr[y_col]
        model = LGBMRegressor(
            n_estimators=500,
            learning_rate=0.05,
            num_leaves=31,
            subsample=0.9,
            colsample_bytree=0.9,
            random_state=42,
        )
        model.fit(X_tr, y_tr)
        test_block = test.copy()
        test_block["h"] = (test_block["date"] - test_start).dt.days + 1
        mask_h = test_block["h"] == h
        X_te = test_block.loc[mask_h, feats]
        yhat = model.predict(X_te)
        out = test_block.loc[mask_h, ["date", "product"]].copy()
        out["h"] = h
        out["yhat"] = yhat
        preds_blocks.append(out)
    if preds_blocks:
        fold_preds = pd.concat(preds_blocks, ignore_index=True)
        y_true = test[["date", "product", "transactions"]].copy()
        merged = y_true.merge(fold_preds, on=["date", "product"], how="left")
        merged["model"] = "lgbm_direct"
        lgbm_all.append(merged)
lgbm_results = pd.concat(lgbm_all, ignore_index=True)
by_h_lgbm, overall_lgbm = summarize_metrics(lgbm_results.rename(columns={"transactions": "y"}))
by_h_lgbm, overall_lgbm


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000456 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1223
[LightGBM] [Info] Number of data points in the train set: 2880, number of used features: 36
[LightGBM] [Info] Start training from score 1.125000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000338 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1223
[LightGBM] [Info] Number of data points in the train set: 2880, number of used features: 36
[LightGBM] [Info] Start training from score 1.126736
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000337 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1223
[LightGBM] [Info] Number of data points in the train set: 2880, number of used features: 36
[LightGBM] [Info] Start traini

(        MAE      RMSE       MAPE       sMAPE
 1  1.253773  1.700311  51.950336   96.822273
 2  1.057390  1.446797  46.922641   98.192148
 3  1.282123  1.631269  59.468925   95.938441
 4  0.983264  1.282467  60.354614   79.863056
 5  1.424265  1.844967  63.476480  100.245654
 6  1.105676  1.542267  80.302016  125.367974
 7  0.645787  0.755241  44.571646  112.276185,
 {'MAE': 1.1074682552468154,
  'RMSE': 1.494892455218169,
  'MAPE': 57.7473501659663,
  'sMAPE': 101.24367578327728})

In [38]:
lgbm_results.to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\lgbm_direct_forecasts.csv", index=False)
by_h_lgbm.to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\lgbm_direct_metrics_by_h.csv")
pd.DataFrame([overall_lgbm]).to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\lgbm_direct_metrics_overall.csv", index=False)
"LGBM direct guardado"

'LGBM direct guardado'

In [39]:
prophet_all = []
for (train_end, test_start, test_end) in splits:
    fold = []
    for prod, g in df_prod[df_prod["date"] <= train_end].groupby("product"):
        g = g.sort_values("date")
        m = Prophet(weekly_seasonality=True, daily_seasonality=False, yearly_seasonality=False)
        aux = g.rename(columns={"date": "ds", "transactions": "y"})[["ds", "y"]]
        m.fit(aux)
        future = m.make_future_dataframe(periods=7, freq="D", include_history=False)
        fcst = m.predict(future)[["ds", "yhat"]]
        fcst["product"] = prod
        fold.append(fcst.rename(columns={"ds": "date"}))
    fold = pd.concat(fold, ignore_index=True)
    mask = (fold["date"] >= test_start) & (fold["date"] <= test_end)
    fold = fold.loc[mask].copy()
    fold["h"] = (fold["date"] - test_start).dt.days + 1
    y_true = df_prod[(df_prod["date"] >= test_start) & (df_prod["date"] <= test_end)][["date", "product", "transactions"]].copy()
    merged = y_true.merge(fold, on=["date", "product"], how="left")
    merged["model"] = "prophet"
    prophet_all.append(merged)
prophet_results = pd.concat(prophet_all, ignore_index=True)
by_h_prophet, overall_prophet = summarize_metrics(prophet_results.rename(columns={"transactions": "y"}))
by_h_prophet, overall_prophet


22:31:02 - cmdstanpy - INFO - Chain [1] start processing
22:31:02 - cmdstanpy - INFO - Chain [1] done processing
22:31:02 - cmdstanpy - INFO - Chain [1] start processing
22:31:02 - cmdstanpy - INFO - Chain [1] done processing
22:31:02 - cmdstanpy - INFO - Chain [1] start processing
22:31:02 - cmdstanpy - INFO - Chain [1] done processing
22:31:02 - cmdstanpy - INFO - Chain [1] start processing
22:31:02 - cmdstanpy - INFO - Chain [1] done processing
22:31:03 - cmdstanpy - INFO - Chain [1] start processing
22:31:03 - cmdstanpy - INFO - Chain [1] done processing
22:31:03 - cmdstanpy - INFO - Chain [1] start processing
22:31:03 - cmdstanpy - INFO - Chain [1] done processing
22:31:03 - cmdstanpy - INFO - Chain [1] start processing
22:31:03 - cmdstanpy - INFO - Chain [1] done processing
22:31:03 - cmdstanpy - INFO - Chain [1] start processing
22:31:03 - cmdstanpy - INFO - Chain [1] done processing
22:31:03 - cmdstanpy - INFO - Chain [1] start processing
22:31:03 - cmdstanpy - INFO - Chain [1]

(        MAE      RMSE       MAPE       sMAPE
 1  1.273708  1.806059  47.686456   96.989572
 2  1.084673  1.558801  46.251176   97.188347
 3  1.182781  1.568151  44.258456   73.543084
 4  1.071159  1.472168  43.029766   79.961065
 5  1.514759  2.166736  48.447347  100.575392
 6  1.013972  1.233269  58.726216  125.185653
 7  0.713720  0.936127  49.469876  107.978054,
 {'MAE': 1.1221101513777312,
  'RMSE': 1.577073322666212,
  'MAPE': 47.542892597250344,
  'sMAPE': 97.34588098262145})

In [40]:
prophet_results.to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\prophet_forecasts.csv", index=False)
by_h_prophet.to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\prophet_metrics_by_h.csv")
pd.DataFrame([overall_prophet]).to_csv(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results\prophet_metrics_overall.csv", index=False)
"Prophet guardado"

'Prophet guardado'

### Comparación modelos

In [41]:
from pathlib import Path

RESULTS_PATH = Path(r"D:\Julian\Estudio\Maestria Inteligencia Artificial\Materias\PROYECTO - DESARROLLO Y DESPLIEGUE DE SOLUCIONES\MICRO - PROYECTOS\results")  # <-- ajusta a tu ruta real
paths = list(RESULTS_PATH.glob("*_metrics_overall.csv"))
rows = []
for p in paths:
    dfm = pd.read_csv(p)
    model = p.name.replace("_metrics_overall.csv", "")
    row = dfm.iloc[0].to_dict()
    row["model"] = model
    rows.append(row)
rank = pd.DataFrame(rows).sort_values("sMAPE")
rank


Unnamed: 0,MAE,RMSE,MAPE,sMAPE,model
0,1.192177,1.785595,64.930023,82.176245,baselines
2,1.12211,1.577073,47.542893,97.345881,prophet
1,1.107468,1.494892,57.74735,101.243676,lgbm_direct
