# Building and Running the Fraud Detection Pipeline

In this notebook, we'll build and run a Kubeflow Pipeline to train our fraud detection model.

In [1]:
import sys
import os

# Add the project root directory to Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))  # Adjust if needed
if project_root not in sys.path:
    sys.path.append(project_root)
    print(f"Added {project_root} to Python path")

Added /Users/prashanth.chaitanya/git-workspaces/kubeflow/kserve-example to Python path


In [2]:
# Import required libraries
import kfp
import pandas as pd
import os
import numpy as np

## Compile and Run the Pipeline

In [3]:
# Import our pipeline
from src.pipeline.pipeline import fraud_detection_pipeline

# Compile the pipeline
pipeline_func = fraud_detection_pipeline
pipeline_filename = "fraud_detection_pipeline.yaml"
kfp.compiler.Compiler().compile(pipeline_func, pipeline_filename)

print(f"Pipeline compiled to {pipeline_filename}")

Pipeline compiled to fraud_detection_pipeline.yaml


In [None]:
from src.client.client_manager import KFPClientManager

# initialize a KFPClientManager
kfp_client_manager = KFPClientManager(
    api_url="http://localhost:8080/pipeline",
    skip_tls_verify=True,

    dex_username="user@example.com",
    dex_password="12341234",

    # can be 'ldap' or 'local' depending on your Dex configuration
    dex_auth_type="local",
)

# get a newly authenticated KFP client
# TIP: long-lived sessions might need to get a new client when their session expires
kfp_client = kfp_client_manager.create_kfp_client()

# Create an experiment
experiment_name = "fraud-detection-kserve"
experiment = kfp_client.create_experiment(name=experiment_name, namespace="kubeflow-user-example-com")

# Submit the pipeline run
run = kfp_client.run_pipeline(
    experiment_id=experiment.experiment_id,
    job_name="fraud-detection-training",
    pipeline_package_path=pipeline_filename,
    params={
        "model_name": "fraud-detection",
        "model_version": "v1"
    }
)

print(f"Pipeline run submitted with ID: {run.experiment_id}")

## Monitor the Pipeline Run

In [None]:
# Get pipeline run status
run_details = kfp_client.get_run(run.run_id)
print(f"Pipeline status: {run_details.state}")

## Examine Pipeline Artifacts

In [None]:
# List pipeline artifacts
import kubernetes.client as k8s_client
from kubernetes import config

try:
    config.load_incluster_config()
except:
    config.load_kube_config()

# Create a Kubernetes API client
api_client = k8s_client.ApiClient()
pod_api = k8s_client.CoreV1Api(api_client)
custom_api = k8s_client.CustomObjectsApi(api_client)

# Get the model registry entry
try:
    model_registry = custom_api.get_namespaced_custom_object(
        group="serving.kubeflow.org",
        version="v1alpha1",
        namespace="kubeflow",
        plural="trainedmodels",
        name="fraud-detection-v1"
    )
    print("Model registry entry:")
    print(model_registry)
except Exception as e:
    print(f"Error getting model registry: {e}")

## Visualize Model Performance

In [None]:
# Load model metrics (assuming we have access to the PVC)
import json
import matplotlib.pyplot as plt
import seaborn as sns

try:
    # This assumes we're running within the cluster or have access to the metrics file
    with open("/mnt/artifacts/fraud-detection/v1/metrics/metrics.json", "r") as f:
        metrics = json.load(f)
        
    print("Model Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")
    
    # Plot metrics
    plt.figure(figsize=(10, 6))
    sns.barplot(x=list(metrics.keys()), y=list(metrics.values()))
    plt.title("Fraud Detection Model Performance")
    plt.ylim(0, 1)
    plt.show()
    
    # Load confusion matrix
    confusion_matrix = pd.read_csv("/mnt/artifacts/fraud-detection/v1/metrics/confusion_matrix.csv", index_col=0)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap="Blues")
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()
    
except Exception as e:
    print(f"Could not access metrics files: {e}")
    print("This is expected if running outside the Kubernetes cluster.")
    print("You can access metrics through the Kubeflow UI or by mounting the PVC.")

## Prepare for Model Deployment

Now that our model is trained and registered, we can proceed to deploy it with KServe.
Continue to the next notebook `02_deploy_model.ipynb` for deployment steps.