In [None]:
# Install dependencies
!pip install mlflow boto3 pandas joblib

In [None]:
# Imports

import os
import json
import boto3
import joblib
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient
from datetime import datetime


In [None]:
# Configuration
# S3
S3_BUCKET = "mlops-creditcard"
INPUT_PREFIX = "inference/input"
OUTPUT_PREFIX = "inference/output"
MODEL_PREFIX = "inference/models"

# MLflow
MLFLOW_TRACKING_URI = "http://<your-mlflow-server>:5000"
MODEL_NAME = "CreditCardFraudModel"

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

s3 = boto3.client("s3")


In [None]:
# Load Batch Inputs
def load_batch_input():
    obj = s3.get_object(
        Bucket=S3_BUCKET,
        Key=f"{INPUT_PREFIX}/batch_input.csv"
    )
    df = pd.read_csv(obj["Body"])
    print(f"üì• Loaded batch input: {df.shape}")
    return df


In [None]:
#Get Champion Model
def get_champion_model():
    client = MlflowClient()
    versions = client.search_model_versions(
        f"name='{MODEL_NAME}'"
    )

    for v in versions:
        mv = client.get_model_version(MODEL_NAME, v.version)
        if mv.tags.get("status") == "production" and mv.tags.get("role") == "champion":
            print(f"üèÜ Champion model: v{v.version}")
            model_uri = f"models:/{MODEL_NAME}/{v.version}"
            model = mlflow.sklearn.load_model(model_uri)
            return model, model_uri

    raise Exception("‚ùå No champion model found")


In [None]:
# Generate Predictions
def generate_predictions(df, model):
    if "ID" not in df.columns:
        df.insert(0, "ID", range(1, len(df) + 1))

    features = df.drop(columns=["ID"] + (["CLASS"] if "CLASS" in df.columns else []))

    preds = model.predict(features)

    if hasattr(model, "predict_proba"):
        probs = model.predict_proba(features)[:, 1]
    else:
        probs = [None] * len(preds)

    df["PREDICTION"] = preds
    df["PREDICTION_PROB"] = probs
    return df


In [None]:
# Save Predictions to S3

def save_predictions_to_s3(df):
    ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
    output_key = f"{OUTPUT_PREFIX}/predictions_{ts}.csv"

    csv_buffer = df.to_csv(index=False)
    s3.put_object(
        Bucket=S3_BUCKET,
        Key=output_key,
        Body=csv_buffer
    )

    print(f"üì§ Predictions saved to s3://{S3_BUCKET}/{output_key}")


In [None]:
 # Save champion model locally & upload to S3

 def save_champion_model(model_uri):
    local_path = mlflow.artifacts.download_artifacts(model_uri)
    model_path = os.path.join(local_path, "model.pkl")

    s3.upload_file(
        model_path,
        S3_BUCKET,
        f"{MODEL_PREFIX}/champion_model.pkl"
    )

    print("üì¶ Champion model uploaded to S3")


In [None]:
def main():
    print("üöÄ AWS Batch Inference Started")

    batch_df = load_batch_input()
    model, model_uri = get_champion_model()
    preds_df = generate_predictions(batch_df, model)

    save_predictions_to_s3(preds_df)
    save_champion_model(model_uri)

    print("‚úÖ AWS Batch Inference Completed")

if __name__ == "__main__":
    main()
