In [0]:
# %pip install mlflow

In [0]:
# %pip install --upgrade typing_extensions statsmodels

In [0]:
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
dbutils.widgets.text("experiment_path", "/Shared/experiments/btc_arima_test")
dbutils.widgets.text("model_name", "training_2025.mayank.btc_arima_model")
dbutils.widgets.text("model_alias", "champion")
dbutils.widgets.text("data_path", "/Volumes/training_2025/mayank/time_series_prac/bitcoin_price.csv")
dbutils.widgets.text("model_output_path", "/Workspace/Users/mayank.bhadauria@datanimbus.com/data_lake_dev/files/src/main/ml/models")


experiment_path = dbutils.widgets.get("experiment_path")
model_name = dbutils.widgets.get("model_name")
model_alias = dbutils.widgets.get("model_alias")
data_path = dbutils.widgets.get("data_path")
model_output_path = dbutils.widgets.get("model_output_path")

In [0]:
import os
import pickle
import pandas as pd
import mlflow
import mlflow.pyfunc
from statsmodels.tsa.arima.model import ARIMA
from mlflow.models import infer_signature
from mlflow.tracking import MlflowClient


class ARIMAModelWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        with open(context.artifacts["arima_model"], "rb") as f:
            self.model = pickle.load(f)

    def predict(self, context, model_input):
        steps = len(model_input)
        return self.model.forecast(steps=steps)


def main():
    # --------- CONFIG VIA ENV VARS (with defaults) ---------
    experiment_path = os.getenv(
        "MLFLOW_EXPERIMENT_PATH",
        "/Workspace/Users/mayank.bhadauria@datanimbus.com/ml_asset_model/btc_arima_test",  
    )

    data_path = os.getenv(
        "BTC_DATA_PATH",
        "/Volumes/training_2025/mayank/time_series_prac/bitcoin_price.csv",
    )

    # Full UC model name: catalog.schema.model
    model_name = os.getenv(
        "MLFLOW_MODEL_NAME",
        "training_2025.mayank.btc_arima_model",
    )

    # Alias to assign to the latest version
    model_alias = os.getenv("MLFLOW_MODEL_ALIAS", "champion")

    mlflow.set_experiment(experiment_path)

    # Load data
    df = pd.read_csv(data_path)
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.set_index("Date")

    close_series = df["Close"]

    # Train ARIMA
    model = ARIMA(close_series, order=(5, 1, 0))
    fitted = model.fit()

    # Signature
    example_input = pd.DataFrame({"dummy": [0.0] * 7})
    example_output = fitted.forecast(steps=len(example_input))
    signature = infer_signature(example_input, example_output)

    # Save ARIMA model locally for MLflow artifact
    # Ensure the directory exists
    os.makedirs(model_output_path, exist_ok=True)
    local_model_path = os.path.join(model_output_path, "arima_model.pkl")
    with open(local_model_path, "wb") as f:
        pickle.dump(fitted, f)

    # Log model to UC
    with mlflow.start_run():
        mlflow.log_param("order_p", 5)
        mlflow.log_param("order_d", 1)
        mlflow.log_param("order_q", 0)
        mlflow.log_metric("final_price", float(close_series.iloc[-1]))

        model_info = mlflow.pyfunc.log_model(
            artifact_path="model",
            python_model=ARIMAModelWrapper(),
            artifacts={"arima_model": local_model_path},
            registered_model_name=model_name,
            signature=signature,
        )

    # After logging, set alias (UC-style) on this version
    client = MlflowClient()
    client.set_registered_model_alias(
        name=model_name,
        alias=model_alias,
        version=model_info.registered_model_version,
    )

    print(
        f"Model successfully logged as {model_name} "
        f"(version {model_info.registered_model_version}, alias '{model_alias}')."
    )


if __name__ == "__main__":
    main()


In [0]:
import os
import mlflow
import pandas as pd


def main():
    model_name = os.getenv(
        "MLFLOW_MODEL_NAME",
        "training_2025.mayank.btc_arima_model",
    )
    model_alias = os.getenv("MLFLOW_MODEL_ALIAS", "champion")

    model_uri = f"models:/{model_name}@{model_alias}"
    print(f"\n>>> Loading model from: {model_uri}")

    # -------- Load model --------
    model = mlflow.pyfunc.load_model(model_uri)

    future_steps = 7
    input_df = pd.DataFrame({"dummy": [0.0] * future_steps})

    # -------- Predict --------
    preds = model.predict(input_df)
    print("\n>>> 7-step forecast:")
    print(preds)


if __name__ == "__main__":
    main()
