## 0. Install Dependencies

Run this cell once to ensure all required packages are available in the current kernel.

In [None]:
# Install dependencies (run once per kernel)
import subprocess, sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q',
    'boto3', 's3fs', 'pandas', 'numpy', 'matplotlib', 'seaborn',
    'scikit-learn', 'xgboost', 'lightgbm', 'sagemaker', 'pyyaml'])

# ML Experiment Template

This template provides pre-configured experiment tracking, production integration
helpers, and automatic logging of code versions and dependencies.

## How to use
1. Copy this notebook and rename it for your experiment.
2. Fill in the experiment configuration in the Setup cell.
3. Add your training and evaluation code in the Experiment section.
4. Use the Production Promotion section when ready to deploy.

## 1. Setup and Configuration

In [None]:
import sys
import os
import subprocess
import platform
import importlib
from datetime import datetime

# Add src to path
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

# ── Experiment configuration ──────────────────────────────────────
EXPERIMENT_NAME = 'fraud-detection-experiment'  # Change this
ALGORITHM = 'xgboost'                           # e.g. xgboost, lightgbm, random_forest
APPROVER = 'data-science-team'                  # Your name or team
DATASET_VERSION = 'v1.0.0'                      # Version of the dataset

print(f'Experiment: {EXPERIMENT_NAME}')
print(f'Algorithm:  {ALGORITHM}')
print(f'Started:    {datetime.now().isoformat()}')

## 2. Automatic Code Version and Dependency Logging

Captures the current git commit, Python version, and installed package versions
for full reproducibility.

In [None]:
def get_code_version():
    """Get the current git commit hash and branch."""
    try:
        commit = subprocess.check_output(
            ['git', 'rev-parse', 'HEAD'], stderr=subprocess.DEVNULL
        ).decode().strip()
    except Exception:
        commit = 'unknown'
    try:
        branch = subprocess.check_output(
            ['git', 'rev-parse', '--abbrev-ref', 'HEAD'], stderr=subprocess.DEVNULL
        ).decode().strip()
    except Exception:
        branch = 'unknown'
    try:
        dirty = subprocess.check_output(
            ['git', 'status', '--porcelain'], stderr=subprocess.DEVNULL
        ).decode().strip()
        is_dirty = len(dirty) > 0
    except Exception:
        is_dirty = False
    return {
        'git_commit': commit,
        'git_branch': branch,
        'git_dirty': is_dirty,
    }


def get_dependency_versions():
    """Get versions of key ML dependencies."""
    packages = [
        'boto3', 'sagemaker', 'pandas', 'numpy', 'scikit-learn',
        'xgboost', 'lightgbm', 'matplotlib', 'seaborn', 'pyyaml',
    ]
    versions = {}
    for pkg in packages:
        try:
            mod = importlib.import_module(pkg.replace('-', '_').replace('scikit-learn', 'sklearn'))
            versions[pkg] = getattr(mod, '__version__', 'installed')
        except ImportError:
            versions[pkg] = 'not installed'
    versions['python'] = platform.python_version()
    return versions


code_version_info = get_code_version()
dependency_versions = get_dependency_versions()

print('Code Version:')
for k, v in code_version_info.items():
    print(f'  {k}: {v}')

print('\nDependency Versions:')
for k, v in dependency_versions.items():
    print(f'  {k}: {v}')

## 3. Initialize Experiment Tracking

In [None]:
from src.experiment_tracking import ExperimentTracker

tracker = ExperimentTracker()

# Start experiment with automatic metadata
experiment_id = tracker.start_experiment(
    experiment_name=EXPERIMENT_NAME,
    algorithm=ALGORITHM,
    dataset_version=DATASET_VERSION,
    code_version=code_version_info['git_commit'],
)

# Log code version and dependency info as parameters
tracker.log_parameters(experiment_id, {
    'code_git_commit': code_version_info['git_commit'],
    'code_git_branch': code_version_info['git_branch'],
    'code_git_dirty': str(code_version_info['git_dirty']),
    'python_version': dependency_versions['python'],
    'dataset_version': DATASET_VERSION,
})

print(f'Experiment ID: {experiment_id}')

## 4. Load Data

In [None]:
import pandas as pd
import numpy as np

# Load data from S3 (update paths for your dataset)
# import os
# BUCKET_SUFFIX = os.environ.get('BUCKET_SUFFIX', 'quannh0308-20260222')
# BUCKET_NAME = f'fraud-detection-data-{BUCKET_SUFFIX}'
# train_df = pd.read_parquet(f's3://{BUCKET_NAME}/prepared/train.parquet')
# val_df = pd.read_parquet(f's3://{BUCKET_NAME}/prepared/validation.parquet')
# test_df = pd.read_parquet(f's3://{BUCKET_NAME}/prepared/test.parquet')

# Placeholder: generate sample data for template demonstration
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X = pd.DataFrame(X, columns=[f'V{i}' for i in range(20)])

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'Training set:   {X_train.shape}')
print(f'Test set:       {X_test.shape}')

## 5. Train Model

Replace this section with your training code.

In [None]:
from xgboost import XGBClassifier

# Define hyperparameters
hyperparameters = {
    'max_depth': 5,
    'learning_rate': 0.1,
    'n_estimators': 100,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'eval_metric': 'logloss',
    'use_label_encoder': False,
    'random_state': 42,
}

# Log hyperparameters
tracker.log_parameters(experiment_id, hyperparameters)

# Train
model = XGBClassifier(**hyperparameters)
model.fit(X_train, y_train)

print('Model trained.')

## 6. Evaluate Model

In [None]:
from src.model_evaluation import ModelEvaluator

evaluator = ModelEvaluator()

# Optional: define production baseline for comparison
baseline_metrics = {
    'accuracy': 0.952,
    'precision': 0.89,
    'recall': 0.85,
    'f1_score': 0.87,
    'auc_roc': 0.94,
}

# Full evaluation
results = evaluator.evaluate_model(model, X_test, y_test, baseline_metrics)

# Log metrics to tracker
tracker.log_metrics(experiment_id, results['metrics'])

print('Metrics:')
for metric, value in results['metrics'].items():
    print(f'  {metric}: {value:.4f}')

print(f"\nMeets production threshold (>= 0.90): {results['meets_production_threshold']}")

if results['comparison']:
    print('\nComparison to baseline:')
    for metric, comp in results['comparison'].items():
        arrow = '↑' if comp['improved'] else '↓'
        print(f"  {metric}: {comp['current']:.4f} (baseline: {comp['baseline']:.4f}, "
              f"{comp['percent_change']:+.2f}% {arrow})")

## 7. Production Promotion Helpers

Use these helpers when your model meets the production threshold and you want
to promote the configuration to the production pipeline.

In [None]:
from src.production_integration import ProductionIntegrator


def promote_experiment(experiment_id, hyperparameters, metrics, approver,
                       trigger_pipeline=False):
    """Helper to promote an experiment to production.

    Validates hyperparameters, backs up current Parameter Store values,
    writes new values, generates S3 config, and optionally triggers
    the production pipeline.

    Args:
        experiment_id: Unique experiment identifier.
        hyperparameters: Dict with keys: objective, num_round, max_depth,
                         eta, subsample, colsample_bytree.
        metrics: Dict of performance metrics (accuracy, precision, etc.).
        approver: Name of the person approving the promotion.
        trigger_pipeline: Whether to trigger the production pipeline.

    Returns:
        Promotion result dict with promotion_event and execution_arn.
    """
    integrator = ProductionIntegrator(experiment_tracker=tracker)
    return integrator.promote_to_production(
        experiment_id=experiment_id,
        hyperparameters=hyperparameters,
        metrics=metrics,
        approver=approver,
        trigger_pipeline=trigger_pipeline,
    )


def check_promotion_readiness(metrics, threshold=0.90):
    """Check if metrics meet the production promotion threshold.

    Args:
        metrics: Dict of performance metrics.
        threshold: Minimum accuracy required (default 0.90).

    Returns:
        True if accuracy meets the threshold.
    """
    accuracy = metrics.get('accuracy', 0)
    ready = accuracy >= threshold
    if ready:
        print(f'✓ Model is production-ready (accuracy={accuracy:.4f} >= {threshold})')
    else:
        print(f'✗ Model does not meet threshold (accuracy={accuracy:.4f} < {threshold})')
    return ready


print('Production promotion helpers loaded.')

In [None]:
# ── Promote to production (uncomment when ready) ─────────────────

# production_params = {
#     'objective': 'binary:logistic',
#     'num_round': hyperparameters['n_estimators'],
#     'max_depth': hyperparameters['max_depth'],
#     'eta': hyperparameters['learning_rate'],
#     'subsample': hyperparameters['subsample'],
#     'colsample_bytree': hyperparameters['colsample_bytree'],
# }
#
# if check_promotion_readiness(results['metrics']):
#     result = promote_experiment(
#         experiment_id=experiment_id,
#         hyperparameters=production_params,
#         metrics=results['metrics'],
#         approver=APPROVER,
#         trigger_pipeline=True,
#     )
#     print(f"Promoted! Backup: {result['promotion_event']['backup_key']}")

## 8. Cleanup

In [None]:
# Close the experiment run
tracker.close_experiment(experiment_id)
print(f'Experiment {experiment_id} closed.')