In [1]:
import os
import duckdb
from pathlib import Path
from timing_utils import log, start_timer
import pandas as pd


class SalesHistoryReader:
    """
    Utility class for reading sales history from DuckDB.
    """

    def __init__(self, project="SOG", db_path=None):
        self.project = project
        self.db_path = Path(
            db_path or os.getenv("BENCHMARK_DB_PATH", Path.cwd() / "sog.duckdb")
        )

    # ---- DB connection ----
    def get_connection(self):
        return duckdb.connect(str(self.db_path))

    # ---- Queries ----
    def fetch_daily_history(self, item_id):
        sql = """
        SELECT
            item_id,
            sale_date,
            sales
        FROM sales_history
        WHERE project = ? AND item_id = ?
        ORDER BY sale_date
        """
        with self.get_connection() as con:
            return con.execute(sql, [self.project, item_id]).fetchdf()

    def fetch_monthly_history(self, item_id, start_date=None):
        sql = """
        SELECT
            item_id,
            sale_date,
            sales
        FROM sales_history_monthly
        WHERE item_id = ?
        {date_filter}
        ORDER BY sale_date
        """

        date_filter = ""
        params = [item_id]

        if start_date:
            date_filter = "AND sale_date >= ?"
            params.append(start_date)

        sql = sql.format(date_filter=date_filter)

        with self.get_connection() as con:
            return con.execute(sql, params).fetchdf()




In [2]:
import json
import requests
import pandas as pd


class NostradamusForecaster:
    """
    Formats sales history, calls the Nostradamus API, and parses forecasts.
    """

    def __init__(
        self,
        api_url="https://api.nostradamus-api.com/api/v1/forecast/generate_async",
        forecast_periods=12,
        local_model="auto_arima",
        season_length=12,
        freq="MS",
        mode="local",
    ):
        self.api_url = api_url
        self.forecast_periods = forecast_periods
        self.local_model = local_model
        self.season_length = season_length
        self.freq = freq
        self.mode = mode

    # -----------------------------
    # History â†’ sim_input_his
    # -----------------------------
    def format_sim_input(self, df_history: pd.DataFrame) -> list:
        if df_history.empty:
            return []

        df = df_history.copy()
        df["sale_date"] = pd.to_datetime(df["sale_date"]).dt.date
        df = df.sort_values(["item_id", "sale_date"])

        return [
            {
                "item_id": row["item_id"],
                "actual_sale": float(row["sales"]),
                "day": row["sale_date"].isoformat(),
            }
            for _, row in df.iterrows()
        ]

    # -----------------------------
    # Build API payload
    # -----------------------------
    def build_payload(self, df_history: pd.DataFrame) -> dict:
        sim_input_his = self.format_sim_input(df_history)

        return {
            "sim_input_his": sim_input_his,
            "forecast_periods": self.forecast_periods,
            "mode": self.mode,
            "local_model": self.local_model,
            "season_length": self.season_length,
            "freq": self.freq,
        }

    def build_payload_json(self, df_history: pd.DataFrame) -> str:
        return json.dumps(self.build_payload(df_history), ensure_ascii=False, indent=2)

    # -----------------------------
    # Call API
    # -----------------------------
    def call_api(self, df_history: pd.DataFrame, timeout=30) -> dict:
        payload = self.build_payload(df_history)
        r = requests.post(self.api_url, json=payload, timeout=timeout)
        r.raise_for_status()
        return r.json()

    # -----------------------------
    # Parse API response
    # -----------------------------
    def parse_forecast_df(self, resp: dict) -> pd.DataFrame:
        """
        Returns a dataframe with forecast_date + forecast
        for the first forecast item in the response.
        """
        if not resp or "forecasts" not in resp or not resp["forecasts"]:
            return pd.DataFrame(columns=["forecast_date", "forecast"])

        f0 = resp["forecasts"][0]

        return (
            pd.DataFrame(
                {
                    "forecast_date": pd.to_datetime(f0["forecast_dates"]),
                    "forecast": f0["forecast"],
                }
            )
            .sort_values("forecast_date")
            .reset_index(drop=True)
        )

    # -----------------------------
    # Convenience one-shot method
    # -----------------------------
    def forecast(self, df_history: pd.DataFrame, timeout=30) -> pd.DataFrame:
        resp = self.call_api(df_history, timeout=timeout)
        return self.parse_forecast_df(resp)

In [None]:

ITEM = "20-000"
PROJECT = "SOG"

reader = SalesHistoryReader(project=PROJECT)

df_hist = reader.fetch_monthly_history(ITEM, start_date="2022-01-01")


forecaster = NostradamusForecaster()
df_fcst = forecaster.forecast(df_hist)

print(df_fcst)

   item_id  sale_date    sales
0   20-000 2022-01-01  1302.00
1   20-000 2022-02-01   468.00
2   20-000 2022-03-01  1221.00
3   20-000 2022-04-01  1998.00
4   20-000 2022-05-01  2950.33
5   20-000 2022-06-01  3328.00
6   20-000 2022-07-01  3152.00
7   20-000 2022-08-01  3591.50
8   20-000 2022-09-01  3440.00
9   20-000 2022-10-01  2946.00
10  20-000 2022-11-01  3369.00
11  20-000 2022-12-01  1268.00
12  20-000 2023-01-01   682.00
13  20-000 2023-02-01  1732.00
14  20-000 2023-03-01  1554.00
15  20-000 2023-04-01  1865.00
16  20-000 2023-05-01  3763.00
17  20-000 2023-06-01  3499.00
18  20-000 2023-07-01  3426.00
19  20-000 2023-08-01  3323.00
20  20-000 2023-09-01  3889.00
21  20-000 2023-10-01  3579.00
22  20-000 2023-11-01  3474.00
23  20-000 2023-12-01  1094.00
24  20-000 2024-01-01   812.00
25  20-000 2024-02-01  1011.00
26  20-000 2024-03-01  1419.00
27  20-000 2024-04-01  2198.00
28  20-000 2024-05-01  3696.00
29  20-000 2024-06-01  3826.00
30  20-000 2024-07-01  3407.00
31  20-0