<a href="https://colab.research.google.com/github/shrutimalik123/python-collab-4/blob/main/MLOps_Experiment_Tracking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Day 4: MLOps Experiment Tracking Simulation
# Objective: Demonstrate MLOps proficiency by creating a system to log experiment
# parameters, metrics, and save model artifacts (essential for production deployment).

import pandas as pd
import numpy as np
import json
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
import pickle # Used to save model artifacts

# --- 1. Custom MLOps Logger Class ---

class MLOps_Logger:
    """
    Simulates an MLOps experiment tracker (like MLflow) by logging
    params, metrics, and artifacts to a local directory structure.
    """
    def __init__(self, experiment_name="Default_Risk_Model"):
        self.run_id = f"run_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}"
        self.log_dir = os.path.join("mlruns", experiment_name, self.run_id)
        os.makedirs(self.log_dir, exist_ok=True)
        print(f"Starting new experiment run: {self.run_id} in {self.log_dir}")

    def log_params(self, params):
        """Logs model hyper-parameters."""
        with open(os.path.join(self.log_dir, 'params.json'), 'w') as f:
            json.dump(params, f, indent=4)
        print("-> Parameters logged successfully.")

    def log_metrics(self, metrics):
        """Logs performance metrics."""
        with open(os.path.join(self.log_dir, 'metrics.json'), 'w') as f:
            json.dump(metrics, f, indent=4)
        print("-> Metrics logged successfully.")

    def log_artifact(self, artifact_object, filename):
        """Saves a model or scaler object using pickle."""
        artifact_path = os.path.join(self.log_dir, 'artifacts')
        os.makedirs(artifact_path, exist_ok=True)
        with open(os.path.join(artifact_path, filename), 'wb') as f:
            pickle.dump(artifact_object, f)
        print(f"-> Artifact saved: {filename}")


# --- 2. Data Preparation (Same as Day 3) ---

data = {
    'Patient_ID': [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010],
    'Age': [55.0, 32.0, 65.0, 78.0, 41.0, 60.0, 58.0, 78.0, 45.0, 62.0],
    'MAP': [108.3, 93.3, 100.0, 116.6, 86.6, 103.3, 111.3, 98.0, 95.0, 105.0],
    'Medication_Count': [4, 1, 2, 6, 1, 3, 5, 2, 3, 4],
    'High_Risk': [1, 0, 0, 1, 0, 1, 1, 0, 0, 1]
}
df = pd.DataFrame(data)

features = ['Age', 'MAP', 'Medication_Count']
X = df[features]
y = df['High_Risk']

# --- 3. Run Experiment and Log Everything ---

# Initialize the MLOps Logger
logger = MLOps_Logger(experiment_name="Cardiovascular_Risk_Model")

# 3a. Define and Log Parameters
model_params = {
    'model_type': 'LogisticRegression',
    'solver': 'liblinear',
    'random_state': 42,
    'features_used': features,
    'test_split_ratio': 0.3
}
logger.log_params(model_params)

# 3b. Prepare Data (including logging the scaler artifact)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=model_params['test_split_ratio'], random_state=model_params['random_state'], stratify=y
)
logger.log_artifact(scaler, 'scaler.pkl') # Must save scaler for production serving!

# 3c. Train Model
model = LogisticRegression(solver=model_params['solver'], random_state=model_params['random_state'])
model.fit(X_train, y_train)

# 3d. Evaluate and Log Metrics
y_pred = model.predict(X_test)
metrics = {
    'accuracy': accuracy_score(y_test, y_pred),
    'precision': precision_score(y_test, y_pred, zero_division=0),
    'recall': recall_score(y_test, y_pred, zero_division=0)
}
logger.log_metrics(metrics)

# 3e. Log Model Artifact
logger.log_artifact(model, 'model.pkl')

print("\n--- MLOps Experiment Tracking Complete ---")
print(f"Results are logged under: {logger.log_dir}")

Starting new experiment run: run_20251116_011754 in mlruns/Cardiovascular_Risk_Model/run_20251116_011754
-> Parameters logged successfully.
-> Artifact saved: scaler.pkl
-> Metrics logged successfully.
-> Artifact saved: model.pkl

--- MLOps Experiment Tracking Complete ---
Results are logged under: mlruns/Cardiovascular_Risk_Model/run_20251116_011754
