In [1]:
!pip install mlflow boto3 scikit-learn joblib



In [2]:
import os
import json
import joblib
import mlflow
import mlflow.sklearn
from datetime import datetime
from mlflow.tracking import MlflowClient


In [3]:
# Configuration
# Artifact locations
# -----------------------------
BASE_DIR = "./CREDITCARD/MODEL"
MODEL_PATH = os.path.join(BASE_DIR, "model.pkl")
METRICS_PATH = os.path.join(BASE_DIR, "metrics.json")

# S3 Configuration
S3_Bucket = "mlops-creditcard"
S3_ARTIFACT_ROOT = f"s3://{S3_Bucket}"

# -----------------------------
# MLflow configuration
# -----------------------------
MLFLOW_EXPERIMENT_NAME = "creditcard-fraud-experiment"
MLFLOW_MODEL_NAME = "creditcard-fraud-model"

In [4]:
# Local backend Configuration (SQLite for persistence)
# Store in project directory
db_path = "/home/ec2-user/SageMaker/ML-Ops-CreditCard-AWS/mlflow.db"
os.makedirs(os.path.dirname(db_path), exist_ok=True)
mlflow.set_tracking_uri(f"sqlite:///{db_path}")

In [5]:
import boto3

bucket_name = "mlops-creditcard"
s3 = boto3.client("s3")

try:
    s3.list_objects_v2(Bucket=bucket_name, MaxKeys=1)
    print("‚úÖ S3 bucket accessible")
except Exception as e:
    print("‚ùå Cannot access S3 bucket:", e)

‚úÖ S3 bucket accessible


In [6]:
if not mlflow.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME):
    mlflow.create_experiment(MLFLOW_EXPERIMENT_NAME, artifact_location=f"s3://{S3_Bucket}/artifacts")

mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

2025/12/30 08:09:12 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/30 08:09:12 INFO mlflow.store.db.utils: Updating database tables
2025/12/30 08:09:12 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/30 08:09:12 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2025/12/30 08:09:12 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
2025/12/30 08:09:12 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
2025/12/30 08:09:12 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
2025/12/30 08:09:12 INFO alembic.runtime.migration: Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
2025/12/30 08:09:12 INFO alembic.runtime.migration: Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
2025/12/30 08:09:12 INFO alembic.runtime.migration: Running 

<Experiment: artifact_location='s3://mlops-creditcard/artifacts', creation_time=1767082154521, experiment_id='1', last_update_time=1767082154521, lifecycle_stage='active', name='creditcard-fraud-experiment', tags={}>

In [7]:
# Validate artifacts exist
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"model not found at {MODEL_PATH}")

if not os.path.exists(METRICS_PATH):
    raise FileNotFoundError(f"metrics not found at {METRICS_PATH}")

print("‚úÖ Artifacts validated")

‚úÖ Artifacts validated


In [8]:
mlflow.set_registry_uri(mlflow.get_tracking_uri())

In [9]:
# Set MLflow experiment (EXPERIMENT TRACKING)
mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

<Experiment: artifact_location='s3://mlops-creditcard/artifacts', creation_time=1767082154521, experiment_id='1', last_update_time=1767082154521, lifecycle_stage='active', name='creditcard-fraud-experiment', tags={}>

In [10]:
# Load model & metrics (explicit disk load)
model = joblib.load(MODEL_PATH)

with open(METRICS_PATH, "r") as f:
    metrics = json.load(f)

print("‚úÖ Loaded model & metrics")
print(metrics)


‚úÖ Loaded model & metrics
{'Accuracy': 0.9995333333, 'Precision': 0.9615384615, 'Recall': 0.8064516129, 'F1 Score': 0.8771929825}


In [11]:
run_name = f"run_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}"

with mlflow.start_run(run_name=run_name) as run:
    run_id = run.info.run_id

    # Log metrics
    for k, v in metrics.items():
        mlflow.log_metric(k, v)

    # Log model using MLflow's sklearn logger
    mlflow.sklearn.log_model(model, artifact_path="model")

    # Log metrics.json as artifact
    mlflow.log_artifact(METRICS_PATH, artifact_path="metrics")
    
    print("‚úÖ Artifacts uploaded to S3")
    print("Run ID:", run_id)



‚úÖ Artifacts uploaded to S3
Run ID: 5f19bdeb776143b6b07b6df1278395c8


In [12]:
MODEL_URI = f"runs:/{run_id}/model"

result = mlflow.register_model(
    model_uri=MODEL_URI,
    name=MLFLOW_MODEL_NAME
)

print("‚úÖ Model registered")
print("Version:", result.version)

2025/12/30 08:09:19 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/30 08:09:19 INFO mlflow.store.db.utils: Updating database tables
2025/12/30 08:09:19 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/30 08:09:19 INFO alembic.runtime.migration: Will assume non-transactional DDL.
Successfully registered model 'creditcard-fraud-model'.


‚úÖ Model registered
Version: 1


Created version '1' of model 'creditcard-fraud-model'.


In [13]:
#Verify experiment run
from mlflow.tracking import MlflowClient

client = MlflowClient()

run = client.get_run(run_id)

print("üìå Run info")
print("Run ID:", run.info.run_id)
print("Experiment ID:", run.info.experiment_id)
print("Metrics:", run.data.metrics)
print("Tags:", run.data.tags)

üìå Run info
Run ID: 5f19bdeb776143b6b07b6df1278395c8
Experiment ID: 1
Metrics: {'Accuracy': 0.9995333333, 'Precision': 0.9615384615, 'Recall': 0.8064516129, 'F1 Score': 0.8771929825}
Tags: {'mlflow.user': 'ec2-user', 'mlflow.source.name': '/home/ec2-user/anaconda3/envs/python3/lib/python3.10/site-packages/ipykernel_launcher.py', 'mlflow.source.type': 'LOCAL', 'mlflow.runName': 'run_2025-12-30_08-09-15'}


In [14]:
# Verify model registry
registered_models = client.search_registered_models()

for model in registered_models:
    print(f"\nüì¶ Model: {model.name}")
    for v in model.latest_versions:
        print(
            f"   ‚îî‚îÄ‚îÄ Version: {v.version}, "
            f"Stage: {v.current_stage}, "
            f"Run ID: {v.run_id}"
        )



üì¶ Model: creditcard-fraud-model
   ‚îî‚îÄ‚îÄ Version: 1, Stage: None, Run ID: 5f19bdeb776143b6b07b6df1278395c8


In [15]:
# Verify model registry
registered_models = client.search_registered_models()

for model in registered_models:
    print(f"\nüì¶ Model: {model.name}")
    for v in model.latest_versions:
        print(
            f"   ‚îî‚îÄ‚îÄ Version: {v.version}, "
            f"Stage: {v.current_stage}, "
            f"Run ID: {v.run_id}"
        )


üì¶ Model: creditcard-fraud-model
   ‚îî‚îÄ‚îÄ Version: 1, Stage: None, Run ID: 5f19bdeb776143b6b07b6df1278395c8


In [16]:
client = MlflowClient()

client.set_model_version_tag(
    name=MLFLOW_MODEL_NAME,
    version=result.version,
    key="role",
    value="challenger"
)

client.set_model_version_tag(
    name=MLFLOW_MODEL_NAME,
    version=result.version,
    key="status",
    value="staging"
)

print("üè∑Ô∏è Model tagged as challenger")

üè∑Ô∏è Model tagged as challenger
