<a href="https://colab.research.google.com/github/mohammedidriss/hiring-system-GGU-Group9/blob/main/Advanced_ROI_Calculator_for_AI_Hiring_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- 1. CONFIGURATION & PRICING DATABASE (2024/2025 Estimates) ---

# Infrastructure Unit Costs (Hourly/Monthly)
PRICING_DB = {
    "AWS Native": {
        "compute": 0.096,   # m5.large (2 vCPU)
        "gpu": 0.526,       # g4dn.xlarge (T4)
        "storage": 0.023,   # S3 Standard
        "ai_token": 0.015,  # Bedrock/Claude (Blended 1k tokens)
        "fee": 0.0,
        "overhead": 0.10,   # Network/Mgmt overhead
        "devops_scale": 1.0 # Standard Ops load
    },
    "Azure Native": {
        "compute": 0.096,   # D2s v3
        "gpu": 0.526,       # NC4as T4
        "storage": 0.019,   # ADLS Gen2
        "ai_token": 0.030,  # Azure OpenAI (GPT-4o blended)
        "fee": 0.0,
        "overhead": 0.10,
        "devops_scale": 1.0
    },
    "GCP Native": {
        "compute": 0.085,   # n2-standard-2
        "gpu": 0.350,       # L4 GPU
        "storage": 0.020,   # GCS
        "ai_token": 0.010,  # Vertex AI (Gemini blended)
        "fee": 0.0,
        "overhead": 0.10,
        "devops_scale": 1.0
    },
    "ROSA (AWS)": {
        "compute": 0.096,   # Underlying EC2
        "gpu": 0.526,       # Underlying EC2
        "storage": 0.023,   # S3
        "ai_token": 0.000,  # Self-Hosted (Cost is in GPU)
        "fee": 0.171,       # ROSA Service Fee (Hourly per 4vCPU)
        "overhead": 0.05,   # Managed service reduces overhead
        "devops_scale": 1.3 # K8s complexity
    },
    "ARO (Azure)": {
        "compute": 0.126,   # VM + Fee bundled
        "gpu": 0.600,       # VM + Fee bundled
        "storage": 0.019,   # ADLS
        "ai_token": 0.000,  # Self-Hosted
        "fee": 0.000,       # Included in unit price often
        "overhead": 0.05,
        "devops_scale": 1.3
    },
    "Self-Managed OCP": {
        "compute": 0.050,   # Bare Metal Amortized
        "gpu": 0.900,       # Hardware Purchase Amortized
        "storage": 0.040,   # ODF/Ceph (3x replication cost)
        "ai_token": 0.000,  # Self-Hosted
        "fee": 0.060,       # Red Hat Sub per core/hr
        "overhead": 0.25,   # Power, Cooling, Datacenter
        "devops_scale": 2.5 # High Ops load
    }
}

# Annual Salaries (Fully Burdened)
LABOR_COSTS = {
    "Data Scientist": 175000,
    "ML Engineer": 170000,
    "DevOps Engineer": 165000,
    "Project Manager": 145000
}

# --- 2. CALCULATION ENGINE ---

def calculate_full_comparison(
    candidates, recruiters, salary, hours_saved, retention,
    n_ds, n_ml, n_devops, n_pm
):
    results = []

    # -- Sizing Heuristics --
    # Storage: 5MB per candidate (Resume + JSON + Logs)
    storage_tb = (candidates * 12 * retention * 0.005)

    # Compute: 1 vCPU per 50 users (Min 2 nodes)
    compute_hours = 730 * max(2, int(recruiters / 50))

    # AI Logic:
    # Cloud Native = API Cost (Tokens)
    # OpenShift = Infrastructure Cost (GPUs)
    ai_tokens_k = recruiters * 20 * 30 # 20 queries/day
    gpu_hours = 730 * 2 # 2x GPUs for HA Inference (Self-Hosted)

    # Business Value (3 Years)
    hourly_rate = salary / 2080
    monthly_value = recruiters * hours_saved * 4.33 * hourly_rate
    total_value_3yr = monthly_value * 36

    # -- Cost Loop --
    for name, rates in PRICING_DB.items():
        # 1. Labor
        # Scale DevOps based on platform complexity factor
        scaled_ops = n_devops * rates["devops_scale"]
        annual_labor = (
            (n_ds * LABOR_COSTS["Data Scientist"]) +
            (n_ml * LABOR_COSTS["ML Engineer"]) +
            (scaled_ops * LABOR_COSTS["DevOps Engineer"]) +
            (n_pm * LABOR_COSTS["Project Manager"])
        )
        labor_3yr = annual_labor * 3

        # 2. Infrastructure
        c_compute = compute_hours * rates["compute"]

        # AI Cost Bifurcation
        if "Native" in name:
            c_ai = ai_tokens_k * rates["ai_token"] # API Cost
            gpu_cost_reporting = 0 # No GPU infra, just API opex
        else:
            c_ai = gpu_hours * rates["gpu"] # GPU Hosting Cost
            gpu_cost_reporting = c_ai # Track this as infra

        c_storage = (storage_tb * 1000) * rates["storage"]

        # OpenShift Fees
        c_fees = 0
        if rates["fee"] > 0:
            nodes = (compute_hours / 730) + (gpu_hours / 730)
            c_fees = nodes * 730 * rates["fee"]

        # Overhead
        raw_infra_monthly = c_compute + c_ai + c_storage + c_fees
        total_infra_monthly = raw_infra_monthly * (1 + rates["overhead"])
        infra_3yr = total_infra_monthly * 36

        # 3. ROI
        tco_3yr = labor_3yr + infra_3yr
        net_profit = total_value_3yr - tco_3yr
        roi = (net_profit / tco_3yr) * 100 if tco_3yr > 0 else 0

        # Categorize for Stacked Chart
        # "AI Compute" is the API cost OR the GPU cost
        # "Base Infra" is everything else (Compute + Storage + Fees + Overhead)
        ai_component_3yr = c_ai * 36
        base_infra_3yr = infra_3yr - ai_component_3yr

        results.append({
            "Platform": name,
            "Labor Cost": round(labor_3yr),
            "Base Infra": round(base_infra_3yr),
            "AI Compute": round(ai_component_3yr),
            "Total TCO": round(tco_3yr),
            "Net Profit": round(net_profit),
            "ROI %": round(roi, 1)
        })

    df = pd.DataFrame(results)

    # --- 3. PLOTTING ---

    # Figure 1: Stacked Cost Breakdown
    fig1, ax1 = plt.subplots(figsize=(10, 6))
    platforms = df["Platform"]

    # Bottom layer: Labor
    p1 = ax1.bar(platforms, df["Labor Cost"], label="Labor (DevOps/DS)", color="#2C3E50", alpha=0.9)
    # Middle layer: Base Infra
    p2 = ax1.bar(platforms, df["Base Infra"], bottom=df["Labor Cost"], label="Base Infra (App/Storage)", color="#2980B9", alpha=0.9)
    # Top layer: AI
    p3 = ax1.bar(platforms, df["AI Compute"], bottom=df["Labor Cost"]+df["Base Infra"], label="AI (GPU/API)", color="#E74C3C", alpha=0.9)

    ax1.set_title("3-Year Cost Breakdown (Where is the money going?)", fontsize=14)
    ax1.set_ylabel("Cost (USD)", fontsize=12)
    ax1.legend()
    ax1.grid(axis='y', linestyle='--', alpha=0.3)
    plt.xticks(rotation=45, ha='right')

    # Add labels on top of stacks
    for i, row in df.iterrows():
        total = row["Total TCO"]
        ax1.text(i, total, f"${total/1e6:.1f}M", ha='center', va='bottom', fontsize=9, fontweight='bold')

    plt.tight_layout()

    # Figure 2: ROI Comparison
    fig2, ax2 = plt.subplots(figsize=(10, 6))
    colors = ['green' if x > 0 else 'red' for x in df["ROI %"]]
    bars = ax2.bar(platforms, df["ROI %"], color=colors, alpha=0.8)

    ax2.set_title("3-Year Return on Investment (ROI) %", fontsize=14)
    ax2.set_ylabel("ROI %", fontsize=12)
    ax2.axhline(0, color='black', linewidth=0.8)
    ax2.grid(axis='y', linestyle='--', alpha=0.3)
    plt.xticks(rotation=45, ha='right')

    for bar in bars:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                 f'{height:.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontweight='bold')

    plt.tight_layout()

    # Formatted Table for Display
    display_df = df.copy()
    money_cols = ["Labor Cost", "Base Infra", "AI Compute", "Total TCO", "Net Profit"]
    for col in money_cols:
        display_df[col] = display_df[col].apply(lambda x: f"${x:,.0f}")
    display_df["ROI %"] = display_df["ROI %"].apply(lambda x: f"{x}%")

    return display_df, fig1, fig2

# --- 4. UI CONSTRUCTION ---

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ðŸ¤– Enterprise AI Hiring System: ROI & TCO Comparator")
    gr.Markdown("Adjust the sliders below to see how **Scale**, **Labor**, and **Platform Choice** impact your financial bottom line.")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 1. Scale & Usage")
            candidates = gr.Slider(10000, 200000, value=50000, step=5000, label="Monthly Candidates")
            recruiters = gr.Slider(50, 1000, value=500, step=10, label="Internal Users (Recruiters)")
            retention = gr.Slider(1, 7, value=5, label="Data Retention (Years)")

            gr.Markdown("### 2. Business Value")
            salary = gr.Number(value=90000, label="Avg Recruiter Salary ($)")
            hours = gr.Slider(1, 15, value=4, label="Hours Saved per Week/Recruiter")

        with gr.Column(scale=1):
            gr.Markdown("### 3. Engineering Team (Base)")
            gr.Markdown("*Note: DevOps headcount scales automatically based on platform complexity.*")
            n_ds = gr.Number(value=2, label="Data Scientists")
            n_ml = gr.Number(value=2, label="ML Engineers")
            n_devops = gr.Number(value=2, label="DevOps Engineers (Base)")
            n_pm = gr.Number(value=1, label="Project Managers")

            btn = gr.Button("ðŸš€ Calculate & Compare", variant="primary")

    # --- TABS FOR OUTPUT ---
    with gr.Tabs():
        with gr.TabItem("ðŸ“Š Executive Summary"):
            gr.Markdown("### Financial Overview (3-Year Horizon)")
            table_output = gr.DataFrame(label="Detailed Financial Comparison")

        with gr.TabItem("ðŸ“ˆ Visual Comparison"):
            gr.Markdown("### Detailed Cost & ROI Charts")
            with gr.Row():
                plot_breakdown = gr.Plot(label="Cost Breakdown (Stacked)")
                plot_roi = gr.Plot(label="ROI Comparison")

    # Event Listener
    btn.click(
        calculate_full_comparison,
        inputs=[candidates, recruiters, salary, hours, retention, n_ds, n_ml, n_devops, n_pm],
        outputs=[table_output, plot_breakdown, plot_roi]
    )

if __name__ == "__main__":
    demo.launch()

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://11a17d8b77b945a821.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
