<a href="https://colab.research.google.com/github/mohammedidriss/hiring-system-GGU-Group9/blob/main/ROI_Calculator_Advanced_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title ðŸ“Š Comprehensive ROI Calculator: Cloud vs. OpenShift + Labor Costs
# @markdown Run this cell to launch the full calculator with Labor estimation.

import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- 1. PRICING CONSTANTS (ESTIMATED PUBLIC PRICING 2024/2025) ---
PRICING = {
    # --- CLOUD NATIVE (Serverless + Dedicated AI Endpoints) ---
    "AWS Native": {
        "Type": "Cloud Native",
        "Storage_GB": 0.023, "Compute_Unit": 0.0000166667, "Request_Unit": 0.20,
        "LB_Hr": 0.0225, "LB_LCU": 0.008,
        "LLM_In_1k": 0.00300, "LLM_Out_1k": 0.01500, # Bedrock API
        "Training_Hr": 0.736,          # SageMaker Training (g4dn.xlarge)
        "Inference_Hr": 0.736,         # SageMaker Real-time Endpoint (g4dn.xlarge)
        "Vector_Hr": 0.24,             # OpenSearch Serverless
        "Color": "#FF9900"
    },
    "Azure Native": {
        "Type": "Cloud Native",
        "Storage_GB": 0.018, "Compute_Unit": 0.000016, "Request_Unit": 0.20,
        "LB_Hr": 0.025, "LB_LCU": 0.005,
        "LLM_In_1k": 0.00500, "LLM_Out_1k": 0.01500, # OpenAI API
        "Training_Hr": 0.52,           # Azure ML Compute (NC4as T4)
        "Inference_Hr": 0.52,          # Azure ML Online Endpoint
        "Vector_Hr": 0.11,             # AI Search
        "Color": "#0078D4"
    },
    "GCP Native": {
        "Type": "Cloud Native",
        "Storage_GB": 0.020, "Compute_Unit": 0.0000240, "Request_Unit": 0.40,
        "LB_Hr": 0.025, "LB_LCU": 0.008,
        "LLM_In_1k": 0.00350, "LLM_Out_1k": 0.01050, # Gemini API
        "Training_Hr": 0.65,           # Vertex AI Training (n1-std-4 + T4)
        "Inference_Hr": 0.65,          # Vertex AI Endpoint
        "Vector_Hr": 0.10,             # Vertex Vector Search
        "Color": "#34A853"
    },
    # --- OPENSHIFT / HYBRID (Infrastructure Based) ---
    "ROSA (AWS)": {
        "Type": "OpenShift",
        "Storage_GB": 0.10, "Compute_Node_Hr": 0.38,
        "GPU_Node_Hr": 1.20, # g5.xlarge + ROSA Fee
        "Training_Node_Hr": 1.20,
        "LB_Hr": 0.0225, "Color": "#CC0000"
    },
    "ARO (Azure)": {
        "Type": "OpenShift",
        "Storage_GB": 0.12, "Compute_Node_Hr": 0.42,
        "GPU_Node_Hr": 1.10, # NC series + ARO Fee
        "Training_Node_Hr": 1.10,
        "LB_Hr": 0.025, "Color": "#8a0a0a"
    },
    "OpenShift Self-Managed": {
        "Type": "OpenShift",
        "Storage_GB": 0.05, "Compute_Node_Hr": 0.15,
        "GPU_Node_Hr": 0.50, # Hardware Amortization
        "Training_Node_Hr": 0.50,
        "LB_Hr": 0.01, "Color": "#212121"
    }
}

# --- 2. CALCULATION ENGINE ---

def calculate_costs(candidates_pm, queries_daily, training_hours,
                    extra_storage_gb, manual_app_nodes, manual_gpu_nodes,
                    autoscale_enabled,
                    # Labor Inputs
                    pm_count, pm_rate,
                    ds_count, ds_rate,
                    de_count, de_rate,
                    devops_count, devops_rate):

    # --- A. INFRASTRUCTURE & AI COST DRIVERS ---
    base_storage_gb = (candidates_pm * 5.0) / 1024
    total_storage_gb = base_storage_gb + extra_storage_gb

    monthly_requests = (candidates_pm * 15) + (queries_daily * 30 * 2)
    compute_seconds = monthly_requests * 2.0

    tokens_in_k = ((candidates_pm * 2000) + (queries_daily * 30 * 1000)) / 1000
    tokens_out_k = ((candidates_pm * 1000) + (queries_daily * 30 * 500)) / 1000

    lb_hours = 730
    lb_units = monthly_requests / 1000000

    # --- B. LABOR COST CALCULATION ---
    cost_pm = pm_count * pm_rate
    cost_ds = ds_count * ds_rate
    cost_de = de_count * de_rate
    cost_devops = devops_count * devops_rate

    total_labor = cost_pm + cost_ds + cost_de + cost_devops

    results = []
    breakdown_data = []

    for provider, p in PRICING.items():
        row_detail = {"Provider": provider}

        if p["Type"] == "Cloud Native":
            # --- CLOUD NATIVE CALC ---
            c_store = total_storage_gb * p["Storage_GB"]
            c_comp = (compute_seconds * p["Compute_Unit"]) + ((monthly_requests/1e6) * p["Request_Unit"])
            c_lb = (lb_hours * p["LB_Hr"]) + (lb_units * 10 * p["LB_LCU"])
            c_vec = 730 * p["Vector_Hr"]

            # AI Costs
            c_llm = (tokens_in_k * p["LLM_In_1k"]) + (tokens_out_k * p["LLM_Out_1k"]) # API
            c_train = training_hours * p["Training_Hr"] # Training Job
            c_inference = manual_gpu_nodes * 730 * p["Inference_Hr"] # SageMaker/Vertex Endpoint

            infra_cost = c_store + c_comp + c_lb + c_vec + c_inference
            ai_cost = c_llm + c_train

            row_detail.update({
                "Infra Scale": "Serverless + Endpoints",
                "Storage": c_store, "Compute": c_comp, "Network/LB": c_lb,
                "Vector DB": c_vec,
                "AI Training": c_train,
                "LLM Tokens": c_llm,
                "Dedicated AI Hosting": c_inference,
                "Labor Cost": total_labor
            })

            results.append({
                "Provider": provider,
                "Total": infra_cost + ai_cost + total_labor,
                "Infra": infra_cost,
                "AI_Services": ai_cost,
                "Labor": total_labor,
                "Color": p["Color"]
            })

        else:
            # --- OPENSHIFT CALC ---
            req_capacity = 400000
            demand_nodes = int(np.ceil(max(1, monthly_requests / req_capacity)))

            demand_gpu = 1
            if queries_daily > 8000: demand_gpu = 2
            if queries_daily > 20000: demand_gpu = 4

            if autoscale_enabled:
                app_nodes = max(manual_app_nodes, demand_nodes)
                gpu_nodes = max(manual_gpu_nodes, demand_gpu)
            else:
                app_nodes = max(1, manual_app_nodes)
                gpu_nodes = manual_gpu_nodes

            c_store = total_storage_gb * p["Storage_GB"]
            c_nodes = app_nodes * 730 * p["Compute_Node_Hr"]
            c_lb = lb_hours * p["LB_Hr"]
            c_gpu_host = gpu_nodes * 730 * p["GPU_Node_Hr"]
            c_train = training_hours * p["Training_Node_Hr"]

            infra_cost = c_store + c_nodes + c_lb + c_gpu_host
            ai_cost = c_train

            row_detail.update({
                "Infra Scale": f"{app_nodes} App / {gpu_nodes} GPU",
                "Storage": c_store, "Compute": c_nodes, "Network/LB": c_lb,
                "Vector DB": 0.00,
                "AI Training": c_train,
                "LLM Tokens": 0.00,
                "Dedicated AI Hosting": c_gpu_host,
                "Labor Cost": total_labor
            })

            results.append({
                "Provider": provider,
                "Total": infra_cost + ai_cost + total_labor,
                "Infra": infra_cost,
                "AI_Services": ai_cost,
                "Labor": total_labor,
                "Color": p["Color"]
            })

        row_detail["TOTAL MONTHLY"] = sum([v for k,v in row_detail.items() if isinstance(v, (int, float))])
        breakdown_data.append(row_detail)

    return pd.DataFrame(results), pd.DataFrame(breakdown_data)

# --- 3. GRADIO LOGIC ---

def update_dashboard(cand, chat, train, store, app_nodes, gpu_nodes, auto,
                     pm_c, pm_r, ds_c, ds_r, de_c, de_r, do_c, do_r):

    df_summary, df_detail = calculate_costs(cand, chat, train, store, app_nodes, gpu_nodes, auto,
                                            pm_c, pm_r, ds_c, ds_r, de_c, de_r, do_c, do_r)

    # 1. Create Plot
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plotting
    plot_data = df_summary.set_index('Provider')[['Infra', 'AI_Services', 'Labor']]
    # Colors: Infra(GreyBlue), AI(Green), Labor(OrangeRed)
    plot_data.plot(kind='bar', stacked=True, ax=ax, color=['#78909C', '#66BB6A', '#FF7043'])

    # Styling
    ax.set_title('Monthly Total Cost of Ownership (TCO) + Labor', fontsize=14, pad=15)
    ax.set_ylabel('Monthly Cost ($)')
    ax.set_xlabel('')
    ax.grid(axis='y', linestyle='--', alpha=0.3)
    ax.legend(["Infrastructure", "AI Services", "Labor Team"], loc='upper left')
    plt.xticks(rotation=45, ha='right')

    # Annotations
    max_val = df_summary['Total'].max()
    ax.set_ylim(0, max_val * 1.25)

    for n, total in enumerate(df_summary['Total']):
        ax.text(n, total + (max_val*0.02), f"${total:,.0f}", ha='center', weight='bold')

    plt.tight_layout()

    # 2. Format Table (Rounding)
    numeric_cols = ["Storage", "Compute", "Network/LB", "Vector DB", "AI Training", "LLM Tokens", "Dedicated AI Hosting", "Labor Cost", "TOTAL MONTHLY"]
    df_detail[numeric_cols] = df_detail[numeric_cols].applymap(lambda x: f"${x:,.2f}")

    # Reorder
    final_cols = ["Provider", "Infra Scale"] + numeric_cols

    return fig, df_detail[final_cols]

# --- 4. GRADIO INTERFACE ---

with gr.Blocks(title="Cloud vs OpenShift ROI Calculator + Labor", theme=gr.themes.Soft()) as demo:

    gr.Markdown("""
    # ðŸ“Š Real-World ROI Calculator: Tech Stack + Labor Costs
    Compare the FULL monthly cost of running your AI Platform, including Infrastructure, AI Services, and **Engineering Labor**.
    """)

    with gr.Row():
        # LEFT COLUMN: INFRA INPUTS
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("### 1. Workload Drivers")
            w_cand = gr.Slider(500, 50000, value=5000, step=500, label="Monthly Candidates Processed")
            w_chat = gr.Slider(100, 25000, value=2000, step=100, label="Daily RAG Chat Queries")
            w_train = gr.Slider(0, 200, value=20, step=10, label="Monthly Training Hours (GPU)")

            gr.Markdown("### 2. Infra Scaling")
            w_store = gr.Slider(0, 10000, value=500, step=100, label="Addt'l Storage (GB)")
            w_n_app = gr.Slider(1, 20, value=3, step=1, label="App/Compute Nodes")
            w_n_gpu = gr.Slider(0, 10, value=1, step=1, label="GPU Nodes / AI Endpoints")
            w_auto = gr.Checkbox(value=True, label="âœ… Enable Auto-scaling")

        # MIDDLE COLUMN: LABOR INPUTS
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("### 3. Human Resources (Labor)")
            with gr.Accordion("Project Management", open=True):
                l_pm_c = gr.Number(value=1, label="Project Managers (Count)")
                l_pm_r = gr.Number(value=10000, label="Avg Monthly Cost ($)")

            with gr.Accordion("Data Science Team", open=False):
                l_ds_c = gr.Number(value=2, label="Data Scientists (Count)")
                l_ds_r = gr.Number(value=12000, label="Avg Monthly Cost ($)")

            with gr.Accordion("Data Engineering Team", open=False):
                l_de_c = gr.Number(value=1, label="Data Engineers (Count)")
                l_de_r = gr.Number(value=11000, label="Avg Monthly Cost ($)")

            with gr.Accordion("DevOps / Cloud Eng", open=False):
                l_do_c = gr.Number(value=1, label="DevOps Engineers (Count)")
                l_do_r = gr.Number(value=11000, label="Avg Monthly Cost ($)")

            btn = gr.Button("ðŸš€ Calculate Total ROI", variant="primary")

    # BOTTOM ROW: RESULTS
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### ðŸ’° Total Cost of Ownership (TCO)")
            out_plot = gr.Plot(label="Cost Comparison")

            gr.Markdown("### ðŸ§¾ Detailed Cost Breakdown")
            out_table = gr.Dataframe(
                headers=["Provider", "Scale", "Storage", "Compute", "Net", "Vector", "Train", "LLM", "AI Host", "Labor", "Total"],
                label="Detailed Receipt"
            )

    # Event Listener
    inputs = [w_cand, w_chat, w_train, w_store, w_n_app, w_n_gpu, w_auto,
              l_pm_c, l_pm_r, l_ds_c, l_ds_r, l_de_c, l_de_r, l_do_c, l_do_r]
    outputs = [out_plot, out_table]

    # Update on load and click
    btn.click(update_dashboard, inputs, outputs)
    demo.load(update_dashboard, inputs, outputs)

if __name__ == "__main__":
    demo.launch()

  with gr.Blocks(title="Cloud vs OpenShift ROI Calculator + Labor", theme=gr.themes.Soft()) as demo:


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://913d0e7ba8aa01dde5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
