In [3]:
# log_to_arize.py

import joblib
import pandas as pd
import mlflow
import uuid
from arize.pandas.logger import Client
from arize.utils.types import Schema, ModelTypes, Environments

# === CONFIG ===
mlflow.set_tracking_uri("http://127.0.0.1:5000")
RUN_ID = "1af054b6afd74e008185074dce1c89f9"              # ← Use the correct run_id
PARQUET_PATH = "data/fraud_data.parquet"
FEATURE_NAMES_PATH = "artifacts/feature_names.pkl"
space_key = "U3BhY2U6MjM3MTI6RThBTQ=="                   # ✅ Already base64 encoded
api_key = "ak-8c93aa68-e105-4c23-b977-4ffb437fe7a5-rZPuli0UaGIrRAJ3x-OkK1sg_l5e5mFT"
MODEL_ID = "fraud_detection_model"
MODEL_VERSION = "v1"

# === 1. Load MLflow Model ===
print("📦 Loading model from MLflow...")
model_uri = f"runs:/{RUN_ID}/"   # ✅ Fixed variable name
model = mlflow.sklearn.load_model(model_uri)
model = mlflow.pyfunc.load_model(model_uri)

# === 2. Load and Prepare Data ===
print("📊 Loading and preparing data...")
df = pd.read_parquet(PARQUET_PATH).sample(n=1000, random_state=42)
X = df.drop(columns=["Class"])
y = df["Class"]

# ✅ Load the saved feature names
feature_names = joblib.load(FEATURE_NAMES_PATH)

# ✅ Align test features to training features
X = pd.get_dummies(X)
for col in feature_names:
    if col not in X.columns:
        X[col] = 0
X = X[feature_names]  # Enforce correct column order
assert list(X.columns) == list(feature_names), "Mismatch in input features"

# === 3. Make Predictions ===
predictions = model.predict(X_test)
print("🧠 Running model predictions...")
X["prediction"] = model.predict(X)
X["actual"] = y.values
X["prediction_id"] = [str(uuid.uuid4()) for _ in range(len(X))]

# === 4. Prepare DataFrame for Arize ===
log_df = X.copy()
log_df.reset_index(drop=True, inplace=True)

# === 5. Arize Client Setup ===
print("🚀 Logging to Arize...")
client = Client(space_key=space_key, api_key=api_key)

schema = Schema(
    prediction_id_column_name="prediction_id",
    prediction_label_column_name="prediction",
    actual_label_column_name="actual"
)

# === 6. Log Data to Arize ===
response = client.log(
    model_id=MODEL_ID,
    model_version=MODEL_VERSION,
    model_type=ModelTypes.BINARY_CLASSIFICATION,
    environment=Environments.PRODUCTION,
    dataframe=log_df.head(100),   # Log only top 100 samples
    schema=schema
)

# === 7. Show Log Status ===
print(f"✅ Arize log status: {response.status_code}")
print(response.text)


📦 Loading model from MLflow...


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

MlflowException: Failed to download artifacts from path 'model', please ensure that the path is correct.