<a href="https://colab.research.google.com/github/mohammedidriss/hiring-system-GGU-Group9/blob/main/Course4_Hiring_system_v1_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Cell 1

In [None]:
# This cell installs all the external libraries
print("Installing required libraries: gradio, xgboost, shap, openpyxl, httpx...")
!pip install -q gradio xgboost shap openpyxl httpx
print("Installations complete.")

Installing required libraries: gradio, xgboost, shap, openpyxl, httpx...
Installations complete.


Cell 2

In [None]:
# imports all the tools.
import pandas as pd
import numpy as np
import os
import json
import gradio as gr
import joblib
import datetime

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report

import shap

import httpx
import asyncio

import warnings
warnings.filterwarnings('ignore')

print("All libraries imported. Ready to mount Google Drive.")

All libraries imported. Ready to mount Google Drive.


Cell 3

In [None]:
# connects Colab notebook to Google Drive

print("Mounting Google Drive...")
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive mounted successfully at /content/drive/")

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully at /content/drive/


Cell 4

In [None]:
# This cell defines our file paths and creates the "Master Dataset."
# It now exclusively uses the 'JobSeeker' sheet to derive features and the 'TARGET_HIRED' label,
# and saves this new "master" file back to your Drive.

print("--- Phase 1: Creating Master Training Dataset (JobSeeker only) ---")

# --- 1. Define File Paths ---
# This is our original Excel file
DRIVE_PATH = "/content/drive/MyDrive/"
FEATURES_FILE_PATH = f"{DRIVE_PATH}JDSampleData.xlsx"
FEATURES_SHEET_NAME = "JobSeeker"
MASTER_DATASET_PATH = f"{DRIVE_PATH}master_training_dataset.csv"

# --- 2. Define the Function ---
def create_master_dataset():
    """
    Loads features from 'JobSeeker' sheet, calculates target labels,
    and saves a new "master" dataset.
    """
    print(f"Loading data from: {FEATURES_FILE_PATH} sheet: {FEATURES_SHEET_NAME}")
    try:
        # Read df_features from the .xlsx file, specifying the sheet name
        df_master = pd.read_excel(FEATURES_FILE_PATH, sheet_name=FEATURES_SHEET_NAME)
        # Standardize 'Journey Id' column name to 'Journey id' ---
        if 'Journey Id' in df_master.columns:
            df_master.rename(columns={'Journey Id': 'Journey id'}, inplace=True)
        # Standardize 'Has CV SWS ' column name to 'Has CV SWS' ---
        if 'Has CV SWS ' in df_master.columns:
            df_master.rename(columns={'Has CV SWS ': 'Has CV SWS'}, inplace=True)
        print(f"Loaded {len(df_master)} rows from JobSeeker sheet.")
        # --- DIAGNOSTIC: Print columns of df_master ---
        print(f"Columns in df_master: {df_master.columns.tolist()}")
        # --- DIAGNOSTIC: Print value counts of 'Status' column in JobSeeker ---
        if 'Status' in df_master.columns:
            print(f"Value counts for 'Status' in JobSeeker: \n{df_master['Status'].value_counts()}")
    except Exception as e:
        print(f"---!!! ERROR loading features file: {e} !!!---")
        print("Please check: 1. File path is correct. 2. Sheet name is correct. 3. 'openpyxl' is installed for .xlsx.")
        return None

    # --- 3. Create the Classification Label (The "Prediction") ---
    # Use 'Status' == 'In work' from the JobSeeker sheet as our "Hired" (1) or "Not Hired" (0) label
    # Fill NaN 'Status' with a placeholder for consistent logic
    df_master['Status'] = df_master['Status'].fillna('Unknown')
    df_master['TARGET_HIRED'] = np.where(df_master['Status'] == 'In work', 1, 0)

    print(f"Master dataset created with {len(df_master)} rows.")

    # --- 4. Save the new master file ---
    df_master.to_csv(MASTER_DATASET_PATH, index=False)
    print(f"Master dataset saved to: {MASTER_DATASET_PATH}")
    return df_master

# --- 5. RUN THE FUNCTION ---
master_df = create_master_dataset()
if master_df is not None:
    print(f"We have {master_df['TARGET_HIRED'].sum()} 'Hired' users to train on (based on JobSeeker 'Status' == 'In work').")

--- Phase 1: Creating Master Training Dataset (JobSeeker only) ---
Loading data from: /content/drive/MyDrive/JDSampleData.xlsx sheet: JobSeeker
Loaded 72641 rows from JobSeeker sheet.
Columns in df_master: ['Journey id', 'gender', 'Age', 'Marital status', 'JS Town', 'JS Town distrinct', 'JWD NON JWD', 'Branch', 'Branch region', 'Attachment date', 'Stream', 'Days on programme', 'Status', 'Hired status?', 'Job Position', 'gosi Start Date', 'employer', 'Job id', 'Work status', 'Escalation', 'Salary expectations', 'Highest Qualification', 'Highest major', 'Shifts', 'Working environment', 'Skills English', 'Skills MS Word', 'Skills MS Powerpoint', 'Transport available', 'Driving license', 'Job title 1', 'Job title 2', 'Job title 3', 'Job goal 1', 'Job goal 2', 'Job goal 3', 'Years workexperience', 'Months workexperience', 'Communication skills', 'Has CV SWS', 'Job readiness', 'Number of nominations', 'Attendance mandatory workshops', 'ID Interviews scheduled']
Value counts for 'Status' in J

Cell 5

In [None]:
# This cell builds the "AI Brain" (XGBoost Model).


print("--- Phase 2: Building the AI Brain (Robust XGBoost) ---")

# Define the brain file path here, similar to MASTER_DATASET_PATH
DRIVE_PATH = "/content/drive/MyDrive/"
BRAIN_FILE_PATH = f"{DRIVE_PATH}ai_brain_pipeline.joblib"

if 'master_df' not in locals() or master_df.empty:
    print("ERROR: 'master_df' not found. Please run Cell 4 first.")
else:
    TARGET_COLUMN = 'TARGET_HIRED'

    # 1. Define Features (Strict List - No Leakage)
    AI_FEATURE_COLUMNS = [
        'gender', 'Age', 'Marital status',
        'Salary expectations', 'Highest Qualification', 'Highest major',
        'Shifts', 'Working environment', 'Skills English', 'Skills MS Word',
        'Skills MS Powerpoint', 'Transport available', 'Driving license',
        'Years workexperience',
        'Months workexperience', 'Communication skills', 'Has CV SWS',
        'Number of nominations',
        'Job title 1', 'Job title 2', 'Job title 3', 'Job goal 1', 'Job goal 2', 'Job goal 3'
    ]

    # 2. Standardize Text (Lowercasing)
    # We create a copy to avoid warnings
    training_df = master_df.copy()

    # --- Standardize 'Has CV SWS ' column name to 'Has CV SWS' ---
    if 'Has CV SWS ' in training_df.columns:
        training_df.rename(columns={'Has CV SWS ': 'Has CV SWS'}, inplace=True)

    # Convert all object/string columns to lowercase
    # This ensures "Data Analyst" matches "data analyst"
    for col in AI_FEATURE_COLUMNS:
        # Check if the column exists before trying to access its dtype or convert
        if col in training_df.columns and training_df[col].dtype == 'object':
            training_df[col] = training_df[col].astype(str).str.lower()

    # 3. Separate Features
    numeric_features = training_df[AI_FEATURE_COLUMNS].select_dtypes(include=np.number).columns.tolist()
    categorical_features = training_df[AI_FEATURE_COLUMNS].select_dtypes(include=['object', 'bool']).columns.tolist()

    ordinal_features = ['Skills English', 'Skills MS Word', 'Skills MS Powerpoint', 'Communication skills']
    skill_levels = ['n_a', 'beginner', 'good', 'excellent'] # Lowercase now!

    categorical_features = [col for col in categorical_features if col not in ordinal_features]

    print(f"Training on {len(AI_FEATURE_COLUMNS)} features.")

    # 4. Build Pipelines
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])

    ordinal_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='n_a')),
        ('encoder', OrdinalEncoder(categories=[skill_levels] * len(ordinal_features), handle_unknown='use_encoded_value', unknown_value=-1))
    ])

    # Use OneHot with 'ignore' to handle unseen job titles gracefully
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='unknown')),
        ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
    ])

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('ord', ordinal_transformer, ordinal_features),
            ('cat', categorical_transformer, categorical_features)
        ],
        remainder='passthrough'
    )

    # 5. Create Regularized XGBoost Pipeline
    # Calculate scale_pos_weight to handle class imbalance
    neg_count = training_df[TARGET_COLUMN].value_counts()[0]
    pos_count = training_df[TARGET_COLUMN].value_counts()[1]
    scale_pos_weight_value = neg_count / pos_count

    ai_brain_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', xgb.XGBClassifier(
            objective='binary:logistic',
            eval_metric='logloss',
            use_label_encoder=False,
            scale_pos_weight=scale_pos_weight_value, # Apply the calculated weight
            random_state=42,
            # --- REGULARIZATION (The "Fuzzy") ---
            max_depth=6,          # Updated max_depth from 4 to 6
            learning_rate=0.05,   # Slower learning is more robust
            n_estimators=300,     # Updated n_estimators from 200 to 300
            subsample=0.8,        # Use only 80% of data per tree (adds noise)
            colsample_bytree=0.9  # Updated colsample_bytree from 0.8 to 0.9
        ))
    ])

    # 6. Train
    print("\nSplitting data and training model...")
    X = training_df[AI_FEATURE_COLUMNS]
    y = training_df[TARGET_COLUMN]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    ai_brain_pipeline.fit(X_train, y_train)
    print("...Training complete.")

    # 7. Evaluate & Feature Importance Check
    print("\n--- Model Performance ---")
    preds = ai_brain_pipeline.predict(X_test)
    probs = ai_brain_pipeline.predict_proba(X_test)[:, 1]
    print(f"Accuracy: {accuracy_score(y_test, preds):.2%}")
    print(f"Avg Prediction Probability: {probs.mean():.2%}")

    # Check Feature Importance to spot Leakage
    print("\n--- TOP 5 DRIVERS OF HIRING (Check for Leakage) ---")
    # Extract feature names
    try:
        ohe_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names_out(categorical_features)
        all_names = numeric_features + ordinal_features + list(ohe_feature_names)
        importances = ai_brain_pipeline.named_steps['classifier'].feature_importances_

        feat_imp = pd.DataFrame({'Feature': all_names, 'Importance': importances})
        feat_imp = feat_imp.sort_values(by='Importance', ascending=False).head(10)
        print(feat_imp)
    except Exception as e:
        print(f"Could not print feature importance (minor issue): {e}")

    # 8. Save
    joblib.dump(ai_brain_pipeline, BRAIN_FILE_PATH)
    print(f"\nSUCCESS: Robust AI Brain saved to: {BRAIN_FILE_PATH}")

--- Phase 2: Building the AI Brain (Robust XGBoost) ---
Training on 24 features.

Splitting data and training model...
...Training complete.

--- Model Performance ---
Accuracy: 75.39%
Avg Prediction Probability: 42.93%

--- TOP 5 DRIVERS OF HIRING (Check for Leakage) ---
                                          Feature  Importance
3                           Number of nominations    0.040618
144                           Driving license_yes    0.005838
145                                 Has CV SWS_no    0.004725
146                                Has CV SWS_yes    0.004647
11                         Marital status_married    0.004091
1765  Job goal 1_saudi schools for boys and girls    0.004076
648                        Job title 1_translator    0.003925
1740          Job goal 1_gold and jewellery trade    0.003896
1807                 Job goal 2_personal services    0.003709
139                       Transport available_nan    0.003684

SUCCESS: Robust AI Brain saved to: /content/

Cell 6

In [None]:
# This cell builds the "Action Plan Generator" .


print("--- Phase 3: Building the Action Plan Generator (SHAP) ---")

# --- 1. Load the Saved AI Brain ---
# (BRAIN_FILE_PATH and EXPLAINER_FILE_PATH were defined in Cell 3)

EXPLAINER_FILE_PATH = f"{DRIVE_PATH}shap_explainer.joblib"

try:
    ai_brain_pipeline = joblib.load(BRAIN_FILE_PATH)
    print(f"Successfully loaded AI Brain from: {BRAIN_FILE_PATH}")
except Exception as e:
    print(f"---!!! ERROR loading 'ai_brain_pipeline.joblib': {e} !!!---")
    raise

# --- 2. Separate Pipeline Components ---
preprocessor = ai_brain_pipeline.named_steps['preprocessor']
model = ai_brain_pipeline.named_steps['classifier']

#
# We ask the preprocessor for its *actual* output names
print("Getting feature names directly from the preprocessor...")
try:
    all_transformed_feature_names = preprocessor.get_feature_names_out().tolist()
    print(f"Successfully got {len(all_transformed_feature_names)} feature names.")
except Exception as e:
    print(f"Error getting feature names: {e}.")
    raise

# --- 4. Transform Training Data  ---
print("Transforming training data for SHAP explainer...")

X = master_df[AI_FEATURE_COLUMNS]
y = master_df[TARGET_COLUMN]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X_train_transformed = preprocessor.transform(X_train)
print("...Data transformed successfully (dense array).")

# --- 5. Build the SHAP Explainer ---
print("Building SHAP TreeExplainer...")
#
explainer = shap.TreeExplainer(model, X_train_transformed)
print("...Explainer built successfully.")

# --- 6. Save the SHAP Explainer ---
joblib.dump(explainer, EXPLAINER_FILE_PATH)
print(f"SUCCESS: SHAP Explainer saved to: {EXPLAINER_FILE_PATH}")

# --- 7. Define Test Function ---
def generate_action_plan_test(new_user_data_df):
    prediction_proba = ai_brain_pipeline.predict_proba(new_user_data_df)[0]
    hire_probability = prediction_proba[1]
    prediction_raw = ai_brain_pipeline.predict(new_user_data_df)[0]

    # We DON'T call .toarray() here
    user_transformed = preprocessor.transform(new_user_data_df)
    shap_values = explainer.shap_values(user_transformed)

    # This will now work
    df_shap = pd.DataFrame(shap_values, columns=all_transformed_feature_names).iloc[0].T
    df_shap = df_shap.reset_index(); df_shap.columns = ['Feature', 'SHAP_Value']
    df_shap['abs_impact'] = df_shap['SHAP_Value'].abs()
    df_shap = df_shap.sort_values(by='abs_impact', ascending=False)

    top_factors = df_shap.head(5)

    action_plan = "--- (TEST) Recommended Action Plan ---\n"
    for _, row in top_factors.iterrows():
        sign = "[+]" if row['SHAP_Value'] > 0 else "[-]"
        action_plan += f"  {sign} {row['Feature']} (Impact: {row['SHAP_Value']:.2f})\n"
    action_plan += f"\n  Prediction: {'Hired' if prediction_raw == 1 else 'Not Hired'} ({hire_probability:.1%})"
    return action_plan

# --- 8. Test the Explainer ---
print("\n--- TESTING THE ACTION PLAN GENERATOR ---")
# We use X_test, which we just created in this cell
sample_user_df = X_test.iloc[0:1]
true_label = y_test.iloc[0]
print(f"Generating plan for a sample user. (True Label: {'Hired' if true_label == 1 else 'Not Hired'})...")
print(generate_action_plan_test(sample_user_df))
print("---------------------------------")

--- Phase 3: Building the Action Plan Generator (SHAP) ---
Successfully loaded AI Brain from: /content/drive/MyDrive/ai_brain_pipeline.joblib
Getting feature names directly from the preprocessor...
Successfully got 1882 feature names.
Transforming training data for SHAP explainer...
...Data transformed successfully (dense array).
Building SHAP TreeExplainer...
...Explainer built successfully.
SUCCESS: SHAP Explainer saved to: /content/drive/MyDrive/shap_explainer.joblib

--- TESTING THE ACTION PLAN GENERATOR ---
Generating plan for a sample user. (True Label: Hired)...
--- (TEST) Recommended Action Plan ---
  [+] num__Number of nominations (Impact: 1.04)
  [+] num__Age (Impact: 0.08)
  [-] num__Years workexperience (Impact: -0.04)
  [+] cat__Job title 3_tele-customer service (Impact: 0.00)
  [+] cat__Job title 2_tele-customer service (Impact: 0.00)

  Prediction: Hired (58.3%)
---------------------------------


Cell 7

In [None]:
import datetime
import joblib
import pandas as pd
import json
import httpx
import asyncio
from google.colab import userdata
import matplotlib.pyplot as plt # Moved here
import matplotlib.colors as mcolors
import numpy as np
import gradio as gr
import plotly.express as px
import os

# --- Install pypdf for resume parsing ---
try:
    import pypdf
except ImportError:
    os.system('pip install -q pypdf')
    import pypdf

print("--- Phase 4 & 5: Defining App Logic and Launching Multi-Page UI ---")

# --- 1. Load API Key from Colab Secrets ---
try:
    MY_GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    if not MY_GOOGLE_API_KEY:
        raise ValueError("API Key is empty or not found.")
except Exception as e:
    print(f"CRITICAL ERROR: API KEY NOT FOUND: {e}")
    raise

# --- 2. Global Variables ---
ai_models = {"pipeline": None, "explainer": None, "feature_names": None}
global_user_df = pd.DataFrame()
processed_new_candidates_df = pd.DataFrame() # Initialize global for new candidate analysis

# Define paths explicitly to avoid scope issues
DRIVE_PATH = "/content/drive/MyDrive/"
BRAIN_FILE_PATH = f"{DRIVE_PATH}ai_brain_pipeline.joblib"
EXPLAINER_FILE_PATH = f"{DRIVE_PATH}shap_explainer.joblib" # explainer filename
MASTER_DATASET_PATH = f"{DRIVE_PATH}master_training_dataset.csv"
NEW_SUBMISSIONS_FILE_PATH = f"{DRIVE_PATH}new_submissions.csv"

# This list MUST match the list in Cell 5
ALL_FEATURE_COLUMNS = [
    'gender', 'Age', 'Marital status',
    'Salary expectations', 'Highest Qualification', 'Highest major',
    'Shifts', 'Working environment', 'Skills English', 'Skills MS Word',
    'Skills MS Powerpoint', 'Transport available', 'Driving license',
    'Years workexperience',
    'Months workexperience', 'Communication skills', 'Has CV SWS',
    'Number of nominations',
    'Job title 1', 'Job title 2', 'Job title 3', 'Job goal 1', 'Job goal 2', 'Job goal 3'
]

# --- 3. Define Dropdown Choices for the UI ---
GENDER_CHOICES = ['Male', 'Female', 'Other', 'Prefer not to say']
MARITAL_CHOICES = ['Single', 'Married', 'Divorced', 'Widowed', 'Other']
SALARY_CHOICES = ['< 3,000SAR', '3,000 - 5,000SAR', '5,000 - 7,000SAR', '> 7,000SAR']
QUALIFICATION_CHOICES = ['High School', 'Diploma', 'Bachelor', 'Masters', 'Doctorate']
SHIFTS_CHOICES = ['No preference', 'Straight shifts', 'Rotating shifts']
ENVIRONMENT_CHOICES = ['Mixed', 'Flexible', 'On-site', 'Remote']
SKILL_LEVEL_CHOICES = ['N_A', 'Beginner', 'Good', 'Excellent']
YES_NO_CHOICES = ['Yes', 'No']
COMMUNICATION_CHOICES = ['Beginner', 'Good', 'Excellent']


# --- 4. Function to Load XGBoost AI Brain (Runs Once) ---
def on_app_load():
    global ai_models, global_user_df
    print("Gradio App Loading: Loading *XGBoost* AI models from Google Drive...")
    try:
        ai_models["pipeline"] = joblib.load(BRAIN_FILE_PATH)
        ai_models["explainer"] = joblib.load(EXPLAINER_FILE_PATH)
        preprocessor = ai_models["pipeline"].named_steps['preprocessor']
        ai_models["feature_names"] = preprocessor.get_feature_names_out().tolist()
        print("...XGBoost Models loaded successfully.")

        # Load Global Data
        if os.path.exists(MASTER_DATASET_PATH):
            global_user_df = pd.read_csv(MASTER_DATASET_PATH)
            print(f"...Loaded {len(global_user_df)} users for Global Dashboard.")
        else:
            print("Warning: Master dataset not found.")

        return "System Ready. AI Models and Global Data loaded."
    except Exception as e:
        return f"CRITICAL ERROR: Could not load AI modules or data. {e}"

# --- 5. Helper Functions ---
def format_feature_name_for_llm(feature_name):
    name = feature_name.replace("cat__", "").replace("ord__", "").replace("num__", "")
    parts = name.split('_', 1)
    if len(parts) == 2:
        if "Skills" in parts[0]: return parts[0].replace("Skills ", "")
        if "Job goal" in parts[0]: return "Job Goal Setting"
        if "Job title" in parts[0]: return "Job Title Alignment"
        return parts[0]
    return name

def run_xgb_prediction_and_get_report(profile_dict):
    print("XGBoost Brain: Preparing prediction...")
    try:
        if ai_models["pipeline"] is None: return "Error: XGBoost model not loaded.", "Unknown", 0.0

        # DEBUG: Print input to verify it's not empty
        # print(f"DEBUG Input Profile: {profile_dict}")

        new_user_df = pd.DataFrame([profile_dict], columns=ALL_FEATURE_COLUMNS)

        # Ensure numeric types are correct
        numeric_cols = ['Age', 'Years workexperience', 'Months workexperience', 'Number of nominations']
        for col in numeric_cols:
            new_user_df[col] = pd.to_numeric(new_user_df[col], errors='coerce').fillna(0)

        pipeline = ai_models["pipeline"]
        explainer = ai_models["explainer"]
        feature_names = ai_models["feature_names"]

        # Prediction
        prediction_proba = pipeline.predict_proba(new_user_df)[0]
        hire_probability = prediction_proba[1]
        print(f"DEBUG Prediction: {hire_probability}")

        # Explanation
        user_transformed = pipeline.named_steps['preprocessor'].transform(new_user_df)
        shap_values = explainer.shap_values(user_transformed)
        df_shap = pd.DataFrame(shap_values, columns=feature_names).iloc[0].T
        df_shap = df_shap.reset_index(); df_shap.columns = ['Feature', 'SHAP_Value']
        df_shap['abs_impact'] = df_shap['SHAP_Value'].abs()

        df_negatives = df_shap[df_shap['SHAP_Value'] < 0].sort_values(by='SHAP_Value', ascending=True)
        top_weakness_topic = "General Profile Improvement"
        if not df_negatives.empty:
            top_weakness_topic = format_feature_name_for_llm(df_negatives.iloc[0]['Feature'])

        report = f"STATISTICAL ANALYSIS REPORT:\nPredicted Hire Probability: {hire_probability:.1%}\nTop 5 Factors:\n"
        for _, row in df_shap.sort_values(by='abs_impact', ascending=False).head(5).iterrows():
            sign = "POSITIVE" if row['SHAP_Value'] > 0 else "NEGATIVE"
            report += f"  - Factor: {row['Feature']}, Impact: {sign}\n"

        return report, top_weakness_topic, hire_probability
    except Exception as e:
        print(f"Prediction Error: {e}")
        return f"Error during XGBoost prediction: {e}", "Unknown", 0.0

# --- 6. Resume Parsing Logic ---
async def parse_resume(file_obj, *current_inputs):
    if file_obj is None: return list(current_inputs)
    try:
        pdf_reader = pypdf.PdfReader(file_obj.name)
        resume_text = "".join(page.extract_text() for page in pdf_reader.pages)
        prompt = f"""
        Extract data from RESUME TEXT to JSON.
        RESUME TEXT: {resume_text[:10000]}
        REQUIRED KEYS: {ALL_FEATURE_COLUMNS}
        Use "Unknown" or 0 if missing.
        """
        json_str = await call_gemini_api(prompt, json_mode=True)
        parsed_data = json.loads(json_str)
        updates = []
        for col in ALL_FEATURE_COLUMNS:
            val = parsed_data.get(col, None)
            updates.append(gr.update(value=val) if val is not None else gr.update())
        return updates
    except Exception as e:
        print(f"Resume parsing error: {e}")
        return list(current_inputs)

# --- 7. Chart Generation Functions ---
def generate_individual_charts(profile_dict):
    skills = {k: profile_dict.get(f'Skills {k}', 'N_A') for k in ['English', 'MS Word', 'MS Powerpoint']}
    skills['Communication'] = profile_dict.get('Communication skills', 'N_A')
    level_map = {'N_A': 0, 'Beginner': 1, 'Good': 2, 'Excellent': 3}
    values = [level_map.get(v, 0) for v in skills.values()]
    labels = list(skills.keys())
    N = len(labels); angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
    values += values[:1]; angles += angles[:1]
    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    ax.fill(angles, values, color='blue', alpha=0.25); ax.plot(angles, values, color='blue', linewidth=2)
    ax.set_xticks(angles[:-1]); ax.set_xticklabels(labels); plt.tight_layout()
    return fig

def plot_hiring_rate_by_category(df, category_col, ax):
    # Fill NaN values in the category_col with 'Unknown' for consistent grouping
    df_plot = df.copy()
    # Convert to object (string) type first, then fillna, to ensure 'Unknown' is accepted
    df_plot[category_col] = df_plot[category_col].astype(str).fillna('Unknown')

    # Calculate hiring rate
    hiring_rates = df_plot.groupby(category_col)['TARGET_HIRED'].mean().sort_index()

    # Plot bar chart
    hiring_rates.plot(kind='bar', ax=ax, color='skyblue')
    ax.set_title(f'Hiring Rate by {category_col}')
    ax.set_ylabel('Hiring Rate')
    ax.set_ylim(0, 1)
    ax.tick_params(axis='x', labelrotation=45)
    ax.set_xlabel('')

def generate_global_dashboard():
    global global_user_df
    if global_user_df.empty or 'TARGET_HIRED' not in global_user_df.columns:
        fig, ax = plt.subplots(); ax.text(0.5, 0.5, "Global user data not loaded or TARGET_HIRED missing.", ha='center'); return fig

    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 12))
    axes = axes.flatten()

    # Plot 1: Overall Hiring Success Rate
    overall_hiring_rate = global_user_df['TARGET_HIRED'].mean()
    axes[0].bar(['Overall'], [overall_hiring_rate], color='lightcoral')
    axes[0].set_title('Overall Hiring Success Rate')
    axes[0].set_ylabel('Hiring Rate')
    axes[0].set_ylim(0, 1)
    axes[0].text(0, overall_hiring_rate + 0.05, f'{overall_hiring_rate:.2%}', ha='center', va='bottom')

    # Plot 2: Hiring Rate by Gender
    plot_hiring_rate_by_category(global_user_df, 'gender', axes[1])

    # Plot 3: Hiring Rate by Highest Qualification
    plot_hiring_rate_by_category(global_user_df, 'Highest Qualification', axes[2])

    # Plot 4: Hiring Rate by Years Work Experience
    df_temp = global_user_df.copy()
    df_temp['Years workexperience'] = pd.to_numeric(df_temp['Years workexperience'], errors='coerce').fillna(0)
    bins = [-1, 0, 1, 5, 10, np.inf] # Including -1 to catch 0 years
    labels = ['0', '<1', '1-5', '6-10', '>10']
    df_temp['binned_years_exp'] = pd.cut(df_temp['Years workexperience'], bins=bins, labels=labels, right=False)
    plot_hiring_rate_by_category(df_temp, 'binned_years_exp', axes[3])

    # Plot 5: Hiring Rate by Has CV SWS
    plot_hiring_rate_by_category(global_user_df, 'Has CV SWS', axes[4])

    # Plot 6: Hiring Rate by Number of Nominations
    df_temp = global_user_df.copy()
    df_temp['Number of nominations'] = pd.to_numeric(df_temp['Number of nominations'], errors='coerce').fillna(0)
    bins = [-1, 0, 1, 2, 5, np.inf] # Including -1 to catch 0 nominations
    labels = ['0', '1', '2', '3-5', '>5']
    df_temp['binned_nominations'] = pd.cut(df_temp['Number of nominations'], bins=bins, labels=labels, right=False)
    plot_hiring_rate_by_category(df_temp, 'binned_nominations', axes[5])

    plt.tight_layout()
    return fig

# --- 8. Submit Function ---
async def process_new_user_submission(*args):
    if ai_models["pipeline"] is None: return "ERROR: AI Models not loaded.", "Error", None, "Error", None, None, None

    new_profile_dict = dict(zip(ALL_FEATURE_COLUMNS, args))

    # Save data
    try:
        save_data = pd.DataFrame([new_profile_dict], columns=ALL_FEATURE_COLUMNS)
        save_data['Journey id'] = f"WEB-{int(datetime.datetime.now().timestamp())}"
        save_data['Attachment date'] = datetime.date.today().isoformat()
        save_data['Stream'] = 'Online Submission'; save_data['Work status'] = 'On programme'
        file_exists = os.path.isfile(NEW_SUBMISSIONS_FILE_PATH)
        save_data.to_csv(NEW_SUBMISSIONS_FILE_PATH, mode='a', header=not file_exists, index=False)
        print("Profile saved.")
    except Exception as e:
        print(f"--- ERROR Saving: {e} ---")

    statistical_report, top_weakness, hire_probability = run_xgb_prediction_and_get_report(new_profile_dict)
    individual_skill_chart = generate_individual_charts(new_profile_dict)

    years = pd.to_numeric(new_profile_dict.get('Years workexperience', 0), errors='coerce')
    months = pd.to_numeric(new_profile_dict.get('Months workexperience', 0), errors='coerce')
    total_years = years + (months / 12)
    level = 'Junior' if total_years <= 2 else ('Mid-level' if total_years <= 5 else 'Senior')
    categorization_text = f"**Level:** {level}\n**Qualification:** {new_profile_dict.get('Highest Qualification', 'N/A')}\n**Top Job Goal:** {new_profile_dict.get('Job goal 1', 'N/A')}"

    # RAG Action Plan
    job_goal = new_profile_dict.get('Job goal 1', 'N_A'); major = new_profile_dict.get('Highest major', 'N_A')
    system_prompt = f"""Act as an AI Career Counselor.
    Profile: {json.dumps(new_profile_dict, indent=2)}
    Stats: {statistical_report}
    Weakness: {top_weakness}
    Goal: {job_goal}

    Task: Use Google Search to find courses/meetups for {job_goal} and {top_weakness}.
    Write a comprehensive career plan with:
    1. Analysis of hire probability.
    2. Recommended internal workshops (from list: CV Writing, Interview Skills).
    3. 2-3 External Courses (MUST use Google Search).
    4. Meetups/Networking events.
    """

    final_action_plan = await call_gemini_api(system_prompt, tools=[{"google_search": {}}], json_mode=False)

    # Success Stories
    hired_df = global_user_df[global_user_df['TARGET_HIRED'] == 1].copy() if not global_user_df.empty and 'TARGET_HIRED' in global_user_df.columns else pd.DataFrame()
    stories = "### \u2001\u2001\u2001\u2001 \u2001Success Stories\nNo exact matches found."
    if not hired_df.empty:
        # Removed 'Job goal 1' from here as well, if it's not a feature in the model
        stories += "Based on available data, no exact matches for success stories can be generated at this moment."

    updated_dashboard = generate_global_dashboard()

    return (
        f"{hire_probability:.1%}", categorization_text, individual_skill_chart, final_action_plan, final_action_plan, updated_dashboard, stories
    )

# --- 9. Follow-up Chat Function ---
async def call_gemini_follow_up_chat(user_message: str, chat_history: list, report_context: str):
    chat_history.append([user_message, None])
    if not report_context:
        chat_history[-1][1] = "Please generate a report first."
        return "", chat_history
    system_prompt = f"Context: {report_context}. User: '{user_message}'. Answer using context and Google Search."
    try:
        bot_response = await call_gemini_api(system_prompt, tools=[{"google_search": {}}]) # Added tools argument here
    except Exception as e:
        bot_response = f"Error generating question: {e}. Please try again."

    chat_history[-1][1] = bot_response
    return "", chat_history

# --- 10. Generic Gemini API Caller ---
async def call_gemini_api(prompt, tools=None, json_mode=False):
    apiKey = MY_GOOGLE_API_KEY
    apiUrl = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={apiKey}"
    generation_config = {"responseMimeType": "application/json"} if json_mode else {"responseMimeType": "text/plain"}
    payload = {"contents": ([{"parts": [{"text": prompt}]}])}
    if tools: payload["tools"] = tools
    payload["generationConfig"] = generation_config # Moved generationConfig here

    transport = httpx.AsyncHTTPTransport(retries=3)
    async with httpx.AsyncClient(transport=transport) as client:
        response = await client.post(apiUrl, json=payload, headers={'Content-Type': 'application/json'}, timeout=90.0)
        if response.status_code != 200: raise Exception(f"API Error: {response.text}")
        result = response.json()
        return result['candidates'][0]['content']['parts'][0]['text']

# --- Mock Interview Placeholder Functions (added to resolve NameError) ---
async def start_mock_interview(role: str, chat_history: list):
    initial_message = f"Hello! I'm your AI Interviewer. We'll be focusing on a {role} role. Let's start with your experience. Tell me about a time you faced a challenge in a previous role and how you overcame it."
    chat_history.append((None, initial_message))
    return chat_history

async def continue_mock_interview(user_response: str, chat_history: list, role: str):
    chat_history.append((user_response, None))

    # Generate a new question using Gemini API
    conversation_history_text = "\n".join([f"{'Interviewer' if i % 2 == 0 else 'Candidate'}: {msg[1] if msg[1] else msg[0]}" for i, msg in enumerate(chat_history)])

    prompt = f"""You are an AI interviewer for a {role} role. The candidate just responded to your last question.
    Your previous questions and the candidate's responses are below. Generate ONE *unique*, relevant, and challenging follow-up interview question.
    Ensure this question has not been asked before in the conversation history.
    Do NOT ask for personal information. Do NOT start with 'Okay' or similar filler words. Keep the question concise and professional.

    Conversation History:
    {conversation_history_text}

    Candidate's last response: {user_response}

    Your next question:"""
    try:
        bot_response = await call_gemini_api(prompt)
    except Exception as e:
        bot_response = f"Error generating question: {e}. Please try again."

    chat_history[-1] = (user_response, bot_response)
    return "", chat_history

# --- New function to autofill with sample data ---
def autofill_sample_data():
    return [
        'Male', # gender
        30, # Age
        'Married', # Marital status
        '5,000 - 7,000SAR', # Salary expectations
        'Bachelor', # Highest Qualification
        'Electrical Engineering', # Highest major
        'No preference', # Shifts
        'Mixed', # Working environment
        'Excellent', # Skills English
        'Good', # Skills MS Word
        'Good', # Skills MS Powerpoint
        'Yes', # Transport available
        'Yes', # Driving license
        7, # Years workexperience
        6, # Months workexperience
        'Excellent', # Communication skills
        'Yes', # Has CV SWS
        5, # Number of nominations
        'Software Engineer', # Job title 1
        'Data Analyst', # Job title 2
        'Project Manager', # Job title 3
        'Software Development', # Job goal 1
        'Data Science', # Job goal 2
        'Project Management' # Job goal 3
    ]

# --- New Candidate Analysis Function ---
def get_new_candidate_analysis_data():
    global processed_new_candidates_df
    print("Running new candidate analysis...")
    if ai_models["pipeline"] is None:
        return pd.DataFrame(), None, "ERROR: AI Models not loaded."

    if not os.path.exists(NEW_SUBMISSIONS_FILE_PATH):
        return pd.DataFrame(), None, "No new submissions found to analyze."

    try:
        new_submissions_df = pd.read_csv(NEW_SUBMISSIONS_FILE_PATH)
        print(f"Loaded {len(new_submissions_df)} new submissions.")

        # : Align new_submissions_df columns with ALL_FEATURE_COLUMNS
        # Identify missing columns and add them
        missing_cols = set(ALL_FEATURE_COLUMNS) - set(new_submissions_df.columns)
        for c in missing_cols:
            # Add missing columns with NaN values. These will be handled by the preprocessor's imputer.
            new_submissions_df[c] = np.nan

        # Identify extra columns and drop them. Exclude columns that might be added by the submission saving process.
        extra_cols = set(new_submissions_df.columns) - set(ALL_FEATURE_COLUMNS) - {'Journey id', 'Attachment date', 'Stream', 'Work status', 'Predicted_Hire_Probability', 'Predicted_Hired'}
        if extra_cols:
            print(f"Dropping extra columns from new_submissions_df: {extra_cols}")
            new_submissions_df = new_submissions_df.drop(columns=list(extra_cols))

        # Ensure the order of columns matches ALL_FEATURE_COLUMNS for consistency when processing
        new_submissions_df = new_submissions_df[list(new_submissions_df.columns.intersection(ALL_FEATURE_COLUMNS)) + list(new_submissions_df.columns.difference(ALL_FEATURE_COLUMNS))]

        # Ensure numeric types are correct before prediction
        numeric_cols = ['Age', 'Years workexperience', 'Months workexperience', 'Number of nominations']
        for col in numeric_cols:
            # Use .loc to avoid SettingWithCopyWarning if new_submissions_df is a slice
            new_submissions_df.loc[:, col] = pd.to_numeric(new_submissions_df[col], errors='coerce').fillna(0)

        # Standardize categorical columns to lowercase for consistency with training data
        for col in ALL_FEATURE_COLUMNS:
            if col in new_submissions_df.columns and new_submissions_df[col].dtype == 'object':
                # Use .loc to avoid SettingWithCopyWarning
                new_submissions_df.loc[:, col] = new_submissions_df[col].astype(str).str.lower()

        # Predict probabilities and assign a 'TARGET_HIRED' based on a threshold (e.g., 0.5)
        # Ensure only ALL_FEATURE_COLUMNS are used for prediction
        probabilities = ai_models["pipeline"].predict_proba(new_submissions_df[ALL_FEATURE_COLUMNS])[:, 1]
        new_submissions_df['Predicted_Hire_Probability'] = probabilities
        new_submissions_df['Predicted_Hired'] = (probabilities >= 0.5).astype(int) # Example threshold

        processed_new_candidates_df = new_submissions_df.copy()

        # Generate summary
        hired_count = processed_new_candidates_df['Predicted_Hired'].sum()
        total_candidates = len(processed_new_candidates_df)
        hiring_rate = (hired_count / total_candidates) * 100 if total_candidates > 0 else 0
        summary = f"### New Candidate Analysis Summary\n\nTotal Candidates: {total_candidates}\nPredicted Hired: {hired_count} ({hiring_rate:.2f}%)\n\nAverage Predicted Probability: {processed_new_candidates_df['Predicted_Hire_Probability'].mean():.2%}"

        # Generate plot (e.g., distribution of predicted probabilities)
        fig_prob = px.histogram(processed_new_candidates_df, x='Predicted_Hire_Probability', nbins=20,
                                title='Distribution of Predicted Hiring Probabilities',
                                labels={'Predicted_Hire_Probability': 'Probability of Being Hired'})
        fig_prob.update_layout(bargap=0.1)

        return processed_new_candidates_df, fig_prob, summary

    except Exception as e:
        print(f"Error during new candidate analysis: {e}")
        return pd.DataFrame(), None, f"Error during analysis: {e}"

--- Phase 4 & 5: Defining App Logic and Launching Multi-Page UI ---


Cell 8

In [None]:
print("Launching Ultimate App...")

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# AI-Powered Predictive Hiring & Action Intelligence System")

    # State variables
    report_context_state = gr.State(value="")
    chat_history_state = gr.State(value=[])

    # --- TOP LEVEL TABS ---
    with gr.Tabs():

        # --- PAGE 1: PROFILE & ACTION PLANNING ---
        with gr.TabItem("     Profile & Action Planning"):
            with gr.Row():
                # --- LEFT COLUMN: INPUT FORM ---
                with gr.Column(scale=3):
                    gr.Markdown("### 1. Profile Input")

                    with gr.Row():
                        resume_file = gr.File(label="     Auto-Fill Profile from Resume (PDF)", file_types=[".pdf"], scale=3)
                        parse_btn = gr.Button("Parse", scale=1)

                    autofill_btn = gr.Button("     Autofill Sample Data") # New Autofill Button

                    # FORM INPUTS MAPPING
                    inputs_map = {}

                    with gr.Accordion("Personal Info", open=True):
                        with gr.Row():
                            inputs_map['gender'] = gr.Dropdown(label="Gender", choices=GENDER_CHOICES, value='Female')
                            inputs_map['Age'] = gr.Number(label="Age", value=25)
                        inputs_map['Marital status'] = gr.Dropdown(label="Marital Status", choices=MARITAL_CHOICES, value='Single')

                    # Removed 'JS Town' and 'JS Town distrinct' inputs as they are no longer model features

                    with gr.Accordion("Education & Salary", open=False):
                        inputs_map['Salary expectations'] = gr.Dropdown(label="Salary Expectations", choices=SALARY_CHOICES, value='3,000 - 5,000SAR')
                        inputs_map['Highest Qualification'] = gr.Dropdown(label="Highest Qualification", choices=QUALIFICATION_CHOICES, value='Bachelor')
                        inputs_map['Highest major'] = gr.Textbox(label="Highest Major", value='Computer Science')

                    with gr.Accordion("Work Preferences", open=False):
                        with gr.Row():
                            inputs_map['Shifts'] = gr.Dropdown(label="Shifts", choices=SHIFTS_CHOICES, value='No preference')
                            inputs_map['Working environment'] = gr.Dropdown(label="Environment", choices=ENVIRONMENT_CHOICES, value='Mixed')

                    with gr.Accordion("Skills", open=False):
                        with gr.Row():
                            inputs_map['Skills English'] = gr.Dropdown(label="English", choices=SKILL_LEVEL_CHOICES, value='Good')
                            inputs_map['Skills MS Word'] = gr.Dropdown(label="Word", choices=SKILL_LEVEL_CHOICES, value='Good')
                            # Removed 'Skills MS Excel' as it's not in the model training features
                            inputs_map['Skills MS Powerpoint'] = gr.Dropdown(label="PPT", choices=SKILL_LEVEL_CHOICES, value='Good')
                        with gr.Row():
                            inputs_map['Communication skills'] = gr.Dropdown(label="Communication", choices=COMMUNICATION_CHOICES, value='Good')

                    with gr.Accordion("Logistics", open=False):
                        with gr.Row():
                            inputs_map['Transport available'] = gr.Radio(label="Transport?", choices=YES_NO_CHOICES, value='No')
                            inputs_map['Driving license'] = gr.Radio(label="License?", choices=YES_NO_CHOICES, value='No')
                        inputs_map['Has CV SWS'] = gr.Radio(label="CV?", choices=YES_NO_CHOICES, value='No') # Corrected column name

                    with gr.Accordion("Target Roles & Goals", open=False):
                        inputs_map['Job title 1'] = gr.Textbox(label="Job Title 1", value="Software Engineer")
                        inputs_map['Job title 2'] = gr.Textbox(label="Job Title 2", value="Data Analyst")
                        inputs_map['Job title 3'] = gr.Textbox(label="Job Title 3", value="Project Manager")
                        inputs_map['Job goal 1'] = gr.Textbox(label="Job Goal 1", value="Software Development")
                        inputs_map['Job goal 2'] = gr.Textbox(label="Job Goal 2", value="Data Science")
                        inputs_map['Job goal 3'] = gr.Textbox(label="Job Goal 3", value="Project Management")

                    with gr.Accordion("Experience", open=False):
                        with gr.Row():
                            inputs_map['Years workexperience'] = gr.Number(label="Years", value=1)
                            inputs_map['Months workexperience'] = gr.Number(label="Months", value=0)

                    with gr.Accordion("Additional Info", open=False):
                        inputs_map['Number of nominations'] = gr.Number(label="Number of Nominations", value=0)

                    all_inputs = [inputs_map[col] for col in ALL_FEATURE_COLUMNS]
                    submit_btn = gr.Button("     Generate Full AI Report", variant="primary")
                    status_output = gr.Textbox(label="System Status", interactive=False, value="App starting...")

                # --- RIGHT COLUMN: COMMAND CENTER ---
                with gr.Column(scale=7):
                    gr.Markdown("### 2. Analysis & Recommendations Command Center")
                    with gr.Row():
                        prediction_output = gr.Textbox(label="Hire Probability", value="N/A", scale=1)
                        category_output = gr.Markdown(value="*Categorization*")
                        success_box = gr.Markdown(label="Success Stories", value="*Run a report to find matches*")

                    with gr.Row():
                        skill_chart_output = gr.Plot(label="Skills Radar")
                        action_plan_output = gr.Markdown(value="*Action Plan will appear here*", label="AI Action Plan")

        # --- PAGE 2: MOCK INTERVIEW ---
        with gr.TabItem("     AI Mock Interview"):
            with gr.Row():
                with gr.Column(scale=1):
                    role_input = gr.Textbox(label="Target Role", value="Data Analyst")
                    start_interview_btn = gr.Button("Start New Interview", variant="primary")
                    interview_clear_btn = gr.Button("Clear Chat")
                with gr.Column(scale=3):
                    interview_chatbot = gr.Chatbot(height=500)
                    interview_msg = gr.Textbox(label="Your Answer")
                    interview_send = gr.Button("Send Answer")

            start_interview_btn.click(start_mock_interview, inputs=[role_input, interview_chatbot], outputs=[interview_chatbot])
            interview_msg.submit(continue_mock_interview, inputs=[interview_msg, interview_chatbot, role_input], outputs=[interview_msg, interview_chatbot])
            interview_send.click(continue_mock_interview, inputs=[interview_msg, interview_chatbot, role_input], outputs=[interview_msg, interview_chatbot])
            interview_clear_btn.click(lambda: [], outputs=[interview_chatbot])

        # --- PAGE 3: DASHBOARD ---
        with gr.TabItem("     Executive Analytics Dashboard"):
            gr.Markdown("### Global User Population Analysis")
            global_dashboard_plot = gr.Plot(label="Global Dashboard")
            refresh_button = gr.Button("Refresh Global Dashboard", variant="secondary")
            refresh_button.click(generate_global_dashboard, outputs=[global_dashboard_plot])

        # --- PAGE 4: CHAT ---
        with gr.TabItem("     Follow-Up Counselor Chat"):
            gr.Markdown("### Ask follow-up questions about the report you just generated in Tab 1.")
            chat_window = gr.Chatbot(label="Follow-up Chat", height=500)
            with gr.Row():
                chat_textbox = gr.Textbox(show_label=False, placeholder="Ask me anything about the report above...", scale=8)
                chat_btn = gr.Button("Send", scale=1)

            chat_btn.click(call_gemini_follow_up_chat, inputs=[chat_textbox, chat_window, report_context_state], outputs=[chat_textbox, chat_window])
            chat_textbox.submit(call_gemini_follow_up_chat, inputs=[chat_textbox, chat_window, report_context_state], outputs=[chat_textbox, chat_window])

        # --- PAGE 5: NEW CANDIDATE ANALYSIS ---
        with gr.TabItem("     New Candidate Analysis"):
            gr.Markdown("### Analyze the recently submitted new candidates.")
            analyze_new_candidates_btn = gr.Button("Run New Candidate Analysis", variant="primary")
            new_candidate_summary_output = gr.Markdown("### Analysis Summary will appear here.")
            new_candidate_plot_output = gr.Plot(label="Potential Category Distribution")
            new_candidate_df_output = gr.DataFrame(label="Processed New Candidate Data (with Predictions)")

            analyze_new_candidates_btn.click(
                fn=get_new_candidate_analysis_data,
                outputs=[new_candidate_df_output, new_candidate_plot_output, new_candidate_summary_output]
            )


    # --- FINAL GLOBAL CONNECTIONS ---
    demo.load(fn=on_app_load, outputs=[status_output]).then(fn=generate_global_dashboard, outputs=[global_dashboard_plot])
    parse_btn.click(parse_resume, inputs=[resume_file] + all_inputs, outputs=all_inputs)
    autofill_btn.click(autofill_sample_data, outputs=all_inputs) # Connect autofill button
    submit_btn.click(
        fn=process_new_user_submission,
        inputs=all_inputs,
        outputs=[prediction_output, category_output, skill_chart_output, action_plan_output, report_context_state, global_dashboard_plot, success_box]
    )


print("Launching Ultimate App...")
demo.launch(debug=True, share=True)

Launching Ultimate App...
Launching Ultimate App...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://b7bd58aaee9970ba78.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Gradio App Loading: Loading *XGBoost* AI models from Google Drive...
...XGBoost Models loaded successfully.
...Loaded 72641 users for Global Dashboard.


Cell 9

Cell 10