In [21]:
import os
import joblib
import pandas as pd
import mlflow
from arize.pandas.logger import Client
from arize.utils.types import ModelTypes, Environments, Schema

# âœ… Step 1: Load artifacts
mlflow.set_tracking_uri("http://127.0.0.1:5000")
run_id = "e505514045f64ef3873160cd5a2a65ef"

# âœ… Step 2: Download model.pkl artifact and load it
local_model_path = mlflow.artifacts.download_artifacts(
    run_id=run_id,
    artifact_path="LogisticRegression_all_features/model.pkl"
)

# âœ… Load features
X_test = pd.read_parquet("dataset/X_test.parquet")
y_test = pd.read_parquet("dataset/y_test.parquet").squeeze()  # Convert to Series if needed

# âœ… Ensure features match training
feature_names = joblib.load("artifacts/feature_names.pkl")
X_test = X_test[feature_names]

# === 3. Make Predictions ===
predictions = model.predict(X_test)
print("ðŸ§  Running model predictions...")
X["prediction"] = model.predict(X)
X["actual"] = y.values
X["prediction_id"] = [str(uuid.uuid4()) for _ in range(len(X))]

# === 4. Prepare DataFrame for Arize ===
log_df = X.copy()
log_df.reset_index(drop=True, inplace=True)

# === 5. Arize Client Setup ===
print("ðŸš€ Logging to Arize...")
client = Client(space_key=space_key, api_key=api_key)

schema = Schema(
    prediction_id_column_name="prediction_id",
    prediction_label_column_name="prediction",
    actual_label_column_name="actual"
)

# === 6. Log Data to Arize ===
response = client.log(
    model_id=MODEL_ID,
    model_version=MODEL_VERSION,
    model_type=ModelTypes.BINARY_CLASSIFICATION,
    environment=Environments.PRODUCTION,
    dataframe=log_df.head(100),   # Log only top 100 samples
    schema=schema
)

# === 7. Show Log Status ===
print(f"âœ… Arize log status: {response.status_code}")
print(response.text)


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

FileNotFoundError: [Errno 2] No such file or directory: 'dataset/X_test.parquet'