<a href="https://colab.research.google.com/github/mohammedidriss/hiring-system-GGU-Group9/blob/main/Advanced_ROI_Calculator_for_AI_Hiring_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- 1. EXPERT PRICING DATABASE (2025 Estimates) ---
# Costs are normalized to hourly or monthly rates.

PRICING_DB = {
    "AWS Native": {
        "compute_hourly": 0.096,    # m5.large (2 vCPU)
        "gpu_hourly": 0.0,          # Uses API, not raw GPU
        "storage_gb_mo": 0.023,     # S3 Standard
        "ai_input_token": 3.00,     # Claude 3.5 Sonnet ($3/1M input)
        "ai_output_token": 15.00,   # Claude 3.5 Sonnet ($15/1M output)
        "platform_fee_hourly": 0.0,
        "net_ops_overhead": 0.12,   # 12% for NAT, LB, WAF, Egress
        "devops_scale": 1.0         # Baseline Ops
    },
    "Azure Native": {
        "compute_hourly": 0.096,    # D2s v3
        "gpu_hourly": 0.0,
        "storage_gb_mo": 0.019,     # ADLS Gen2
        "ai_input_token": 2.50,     # GPT-4o ($2.50/1M input)
        "ai_output_token": 10.00,   # GPT-4o ($10.00/1M output)
        "platform_fee_hourly": 0.0,
        "net_ops_overhead": 0.12,
        "devops_scale": 1.0
    },
    "GCP Native": {
        "compute_hourly": 0.085,    # n2-standard-2
        "gpu_hourly": 0.0,
        "storage_gb_mo": 0.020,     # GCS
        "ai_input_token": 1.25,     # Gemini 1.5 Pro ($1.25/1M input)
        "ai_output_token": 5.00,    # Gemini 1.5 Pro ($5.00/1M output)
        "platform_fee_hourly": 0.0,
        "net_ops_overhead": 0.12,
        "devops_scale": 1.0
    },
    "ROSA (AWS)": {
        "compute_hourly": 0.096,    # Underlying EC2
        "gpu_hourly": 1.50,         # g5.4xlarge (Hosting Llama 3)
        "storage_gb_mo": 0.023,     # S3 for Data Lake
        "ai_input_token": 0.0,      # Self-Hosted (Cost is in GPU)
        "ai_output_token": 0.0,
        "platform_fee_hourly": 0.171, # ROSA Service Fee (4vCPU)
        "net_ops_overhead": 0.08,   # Slightly lower egress profile
        "devops_scale": 1.2         # K8s Mgmt
    },
    "ARO (Azure)": {
        "compute_hourly": 0.126,    # VM + ARO bundled
        "gpu_hourly": 1.60,         # NC Series
        "storage_gb_mo": 0.019,     # ADLS
        "ai_input_token": 0.0,
        "ai_output_token": 0.0,
        "platform_fee_hourly": 0.0, # Included in compute rate usually
        "net_ops_overhead": 0.08,
        "devops_scale": 1.2
    },
    "Self-Managed OCP": {
        "compute_hourly": 0.050,    # Amortized Bare Metal
        "gpu_hourly": 0.90,         # Amortized GPU Hardware
        "storage_gb_mo": 0.040,     # ODF/Ceph (3x replication)
        "ai_input_token": 0.0,
        "ai_output_token": 0.0,
        "platform_fee_hourly": 0.06,# Red Hat Subscription
        "net_ops_overhead": 0.25,   # Power, Cooling, DC Networking
        "devops_scale": 2.5         # Heavy Ops Load
    }
}

# Annual Salaries (Fully Burdened)
LABOR_COSTS = {
    "Data Scientist": 175000,
    "ML Engineer": 170000,
    "DevOps Engineer": 165000,
    "Project Manager": 145000
}

# --- 2. CALCULATION ENGINE ---

def calculate_granular_roi(
    candidates, recruiters, salary, hours_saved, retention,
    n_ds, n_ml, n_devops, n_pm,
    agency_cost_savings, churn_cost_savings
):
    results = [] # Initialized to an empty list

    # --- SIZING HEURISTICS ---
    # 1. Compute Load (App Layer)
    # 1 vCPU per 50 users. Min 2 nodes for HA.
    # Hours = 730 per month.
    app_nodes = max(2, int(recruiters / 50))
    compute_hours_total = app_nodes * 730

    # 2. Storage Load
    # 5MB per candidate (Resume + JSON + Embeddings + Logs)
    # Corrected: 0.005 represents 5MB in GB (5MB / 1000MB/GB). Removed extra *1000.
    storage_gb = candidates * 12 * retention * 0.005

    # 3. AI Load (GenAI)
    # Assumptions: 20 queries/day/recruiter. 2k input tokens, 500 output tokens per query.
    queries_mo = recruiters * 20 * 22 # 22 working days
    tok_in_mil = (queries_mo * 2000) / 1_000_000
    tok_out_mil = (queries_mo * 500) / 1_000_000

    # Self-Hosted GPU Load (for OpenShift)
    # 2x GPUs running 24/7 for HA Inference
    gpu_hours_total = 2 * 730

    # 4. Business Value (3 Years)
    # Value from recruiter efficiency
    hourly_rate = salary / 2080
    monthly_value_recruiter_efficiency = recruiters * hours_saved * 4.33 * hourly_rate

    # Total value from all sources over 3 years
    total_value_3yr = (
        monthly_value_recruiter_efficiency * 36 +
        agency_cost_savings * 3 +
        churn_cost_savings * 3
    )

    # --- COST CALCULATION LOOP ---
    for platform, prices in PRICING_DB.items():
        # A. Labor Cost
        # DevOps scales with complexity factor
        ops_fte = n_devops * prices["devops_scale"]
        annual_labor = (
            (n_ds * LABOR_COSTS["Data Scientist"]) + # Corrected dictionary access
            (n_ml * LABOR_COSTS["ML Engineer"]) +
            (ops_fte * LABOR_COSTS["DevOps Engineer"]) + # Corrected dictionary access
            (n_pm * LABOR_COSTS["Project Manager"])
        )
        labor_3yr = annual_labor * 3

        # B. App Compute Cost (Application Layer)
        cost_compute_mo = compute_hours_total * prices["compute_hourly"]

        # C. AI Cost (LLM API vs GPU Infra)
        if "Native" in platform:
            # API Cost
            cost_ai_mo = (tok_in_mil * prices["ai_input_token"]) + \
                         (tok_out_mil * prices["ai_output_token"])
            gpu_cost_mo = 0
        else:
            # Self-Hosted GPU Cost
            cost_ai_mo = 0 # No token fee
            gpu_cost_mo = gpu_hours_total * prices["gpu_hourly"]

        # D. Storage Cost
        cost_storage_mo = storage_gb * prices["storage_gb_mo"]

        # E. Platform Fees (Licensing)
        # Fees usually apply to all cores (App + GPU nodes)
        total_cores = (app_nodes * 4) + (2 * 16) # Approx core count
        cost_fees_mo = total_cores * 730 * (prices["platform_fee_hourly"] / 4) # Normalize fee

        # F. Networking & Operations Overhead
        # Calculated as % of raw infrastructure
        raw_infra = cost_compute_mo + gpu_cost_mo + cost_storage_mo + cost_fees_mo
        cost_net_mo = raw_infra * prices["net_ops_overhead"]

        # G. Aggregation (3 Years)
        # Note: 'App Layer' in chart will include Compute. 'AI Layer' includes API or GPU.

        tco_compute = cost_compute_mo * 36
        tco_ai = (cost_ai_mo + gpu_cost_mo) * 36
        tco_storage = cost_storage_mo * 36
        tco_fees = cost_fees_mo * 36
        tco_net = cost_net_mo * 36

        total_tco_3yr = labor_3yr + tco_compute + tco_ai + tco_storage + tco_fees + tco_net
        net_profit = total_value_3yr - total_tco_3yr
        roi = (net_profit / total_tco_3yr) * 100 if total_tco_3yr != 0 else 0 # Avoid division by zero

        results.append({
            "Platform": platform,
            "Labor": round(labor_3yr),
            "App Compute": round(tco_compute),
            "AI Layer (LLM/GPU)": round(tco_ai),
            "Storage": round(tco_storage),
            "Platform Fees": round(tco_fees),
            "Networking & Ops": round(tco_net),
            "Total TCO": round(total_tco_3yr),
            "Net Profit": round(net_profit),
            "ROI %": round(roi, 1)
        })

    df = pd.DataFrame(results)

    # --- PLOTTING ---

    # 1. Stacked Bar Chart (Granular Cost Breakdown)
    fig1, ax1 = plt.subplots(figsize=(12, 7))
    platforms = df["Platform"]

    # Define layers for stacking
    layers = ["Labor", "App Compute", "AI Layer (LLM/GPU)", "Storage", "Platform Fees", "Networking & Ops"]
    # Define specific colors for each layer
    colors = ["#2C3E50", "#3498DB", "#E74C3C", "#2ECC71", "#F1C40F", "#95A5A6"]

    bottom_y = np.zeros(len(platforms))

    for i, layer in enumerate(layers):
        ax1.bar(platforms, df[layer], bottom=bottom_y, label=layer, color=colors[i], alpha=0.9, width=0.6)
        bottom_y += df[layer]

    ax1.set_title("3-Year Granular TCO Breakdown ($)", fontsize=14, fontweight='bold')
    ax1.set_ylabel("Cost (USD)", fontsize=12)
    ax1.legend(loc='upper right', bbox_to_anchor=(1.15, 1))
    ax1.grid(axis='y', linestyle='--', alpha=0.3)
    plt.xticks(rotation=45, ha='right')

    # Add Total Labels
    for i, total in enumerate(df["Total TCO"]):
        ax1.text(i, total, f"${total/1e6:.1f}M", ha='center', va='bottom', fontweight='bold')

    plt.tight_layout()

    # 2. ROI Bar Chart
    fig2, ax2 = plt.subplots(figsize=(10, 6))
    roi_colors = ['green' if x > 0 else 'red' for x in df["ROI %"]] # Corrected definition
    bars = ax2.bar(platforms, df["ROI %"], color=roi_colors, alpha=0.85) # Used 'ROI %' column

    ax2.set_title("3-Year Return on Investment (ROI) %", fontsize=14, fontweight='bold')
    ax2.set_ylabel("ROI Percentage", fontsize=12)
    ax2.axhline(0, color='black', linewidth=1)
    ax2.grid(axis='y', linestyle='--', alpha=0.3)
    plt.xticks(rotation=45, ha='right')

    for bar in bars:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                 f'{height:.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontweight='bold')

    plt.tight_layout()

    # Formatted Data Table
    display_df = df.copy()
    format_cols = ["Labor", "App Compute", "AI Layer (LLM/GPU)", "Storage", "Platform Fees", "Networking & Ops", "Total TCO", "Net Profit"] # Defined columns for formatting
    for col in format_cols:
        if col in display_df.columns:
            display_df[col] = display_df[col].apply(lambda x: f"${x:,.0f}")
    display_df["ROI %"] = display_df["ROI %"].apply(lambda x: f"{x}%") # Apply only to ROI column

    return display_df, fig1, fig2

# --- UI SETUP ---

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üìä Granular AI Cloud ROI Calculator")
    gr.Markdown("Compare **Labor, Infrastructure, AI Tokens, and Licensing Costs** across AWS, Azure, GCP, and OpenShift.")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### üè¢ Scale Parameters")
            candidates = gr.Slider(10000, 250000, value=50000, step=5000, label="Monthly Candidates")
            recruiters = gr.Slider(50, 1500, value=500, step=50, label="Recruiters (Users)")
            retention = gr.Slider(1, 10, value=5, label="Data Retention (Years)")

            gr.Markdown("### üí∞ Value Drivers")
            salary = gr.Number(value=90000, label="Avg Recruiter Salary ($)")
            hours = gr.Slider(1, 10, value=4, label="Hours Saved / Week")
            agency_cost_savings = gr.Number(value=500000, label="Annual Agency Cost Savings ($)", info="Estimated annual savings from reduced reliance on external agencies due to AI.")
            churn_cost_savings = gr.Number(value=250000, label="Annual Employee Churn Cost Savings ($)", info="Estimated annual savings from AI improving hire quality and reducing employee turnover.")

        with gr.Column():
            gr.Markdown("### üë∑ Team Composition")
            gr.Markdown("*Note: DevOps headcount scales automatically for Self-Managed platforms.*")
            n_ds = gr.Number(value=2, label="Data Scientists")
            n_ml = gr.Number(value=2, label="ML Engineers")
            n_devops = gr.Number(value=2, label="DevOps Engineers (Base)")
            n_pm = gr.Number(value=1, label="Project Managers")

            btn = gr.Button("üöÄ Calculate Detailed TCO", variant="primary")

    with gr.Tabs():
        with gr.TabItem("üìâ Visual Analysis"):
            gr.Markdown("### Cost & ROI Breakdown")
            with gr.Row():
                plot_cost = gr.Plot(label="Granular Cost Breakdown")
            with gr.Row():
                plot_roi = gr.Plot(label="ROI Comparison")

        with gr.TabItem("üìã Detailed Data"):
            table_output = gr.DataFrame(label="Financials (3-Year Horizon)")

    btn.click(
        calculate_granular_roi,
        inputs=[candidates, recruiters, salary, hours, retention, n_ds, n_ml, n_devops, n_pm, agency_cost_savings, churn_cost_savings],
        outputs=[table_output, plot_cost, plot_roi]
    )

if __name__ == "__main__":
    demo.launch()

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://40e25d5a0a2a704df7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
