In [None]:
import os
import joblib
import pandas as pd
import mlflow
from arize.pandas.logger import Client
from arize.utils.types import ModelTypes, Environments, Schema

# ✅ Step 1: Load artifacts
mlflow.set_tracking_uri("http://127.0.0.1:5000")
run_id = "35d931bc1f564dd38b32ee6e3bb30242"

# ✅ Step 2: Download model.pkl artifact and load it
local_model_path = mlflow.artifacts.download_artifacts(run_id=run_id, artifact_path="model.pkl")
model = joblib.load(local_model_path)
print("✅ Model loaded from model.pkl")

# ✅ Load features
X_test = pd.read_parquet("data/X_test.parquet")
y_test = pd.read_parquet("data/y_test.parquet").squeeze()  # Convert to Series if needed

# ✅ Ensure features match training
feature_names = joblib.load("artifacts/feature_names.pkl")
X_test = X_test[feature_names]

# ✅ Predict
preds = model.predict(X_test)

# ✅ Prepare dataframe for Arize
df = X_test.copy()
df["prediction_id"] = [f"id_{i}" for i in range(len(X_test))]
df["prediction_label"] = preds
df["actual_label"] = y_test.reset_index(drop=True)
df["timestamp"] = pd.Timestamp.now()

# ✅ Arize setup
ARIZE_API_KEY = "ak-8c93aa68-e105-4c23-b977-4ffb437fe7a5-rZPuli0UaGIrRAJ3x-OkK1sg_l5e5mFT"
ARIZE_SPACE_KEY ="U3BhY2U6MjM3MTI6RThBTQ=="

client = Client(space_key=ARIZE_SPACE_KEY, api_key=ARIZE_API_KEY)

# ✅ Define Schema
schema = Schema(
    prediction_id_column_name="prediction_id",
    prediction_label_column_name="prediction_label",
    actual_label_column_name="actual_label",
    timestamp_column_name="timestamp"
)

# ✅ Log to Arize
response = client.log(
    dataframe=df,
    model_id="fraud-logistic-v1",  # Change if needed
    model_version=run_id,
    model_type=ModelTypes.BINARY,
    environment=Environments.TEST,
    schema=schema,
)

print("✅ Arize log status:", response.status_code)
print("✅ Arize response:", response.text)


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]