# Inference Test

In [None]:
import pandas as pd
import numpy as np
import joblib
import time
import sys
import os

# Add project root to path
sys.path.append(os.path.abspath(os.path.join('..')))

from src import config
from src import preprocessing as pp

In [None]:
# Global cache for the model (so we don't reload it 100 times)
_MODEL = None

def load_model():
    """
    Loads the trained model from the models/ directory.
    Uses a global variable to cache it (Singleton pattern).
    """
    global _MODEL
    
    if _MODEL is None:
        model_path = os.path.join("..", "models", f"{config.MODEL_NAME}_v1.pkl")
        
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"‚ùå Model not found at {model_path}. Run src/train.py first!")
            
        print(f"Loading model from {model_path}...")
        _MODEL = joblib.load(model_path)
        
    return _MODEL

def make_prediction(input_data):
    """
    Main entry point for inference.
    Args:
        input_data (dict or pd.DataFrame): Raw input data.
    Returns:
        dict: {'prediction': int, 'probability': float, 'status': str}
    """
    # 1. Convert Dictionary to DataFrame (if needed)
    if isinstance(input_data, dict):
        df = pd.DataFrame([input_data])
    else:
        df = input_data.copy()
        
    # 2. Preprocessing (Must match training!)
    # NOTE: In a real complex project, we would load a saved 'pipeline.pkl' here.
    # For this template, we apply the stateless cleaning functions.
    df = pp.clean_column_names(df)
    
    # 3. Load Model
    model = load_model()
    
    # 4. Predict
    # Ensure columns match model expectation
    try:
        prediction = model.predict(df)[0]
        
        # Get probability if supported (for "Risk Score")
        if hasattr(model, "predict_proba"):
            probability = model.predict_proba(df)[0][1] # Probability of Class 1 (Default)
        else:
            probability = None
            
        return {
            "prediction": int(prediction),
            "probability": float(probability) if probability else 0.0,
            "status": "Success"
        }
        
    except Exception as e:
        return {
            "prediction": None,
            "error": str(e),
            "status": "Failed"
        }

print("‚úÖ Libraries loaded. Inference functions defined. Ready to test inference.")

In [None]:
# Load the clean data just to get a sample row
df = pd.read_csv(f"../{config.PROCESSED_DATA_PATH}")

# Pick a random row (e.g., row #100)
sample_row = df.iloc[100]
print("--- Sample Input Data ---")
print(sample_row)

# Separate Target (We don't send the answer to the model!)
ground_truth = sample_row[config.TARGET_COLUMN]
input_data = sample_row.drop(config.TARGET_COLUMN).to_dict()

print("\n--- Input Payload (JSON-like) ---")
print(input_data)

In [None]:
print("‚è≥ Calling make_prediction()...")

# START TIMER
start_time = time.time()

# CALL THE FUNCTION
result = make_prediction(input_data)

# STOP TIMER
end_time = time.time()
latency = (end_time - start_time) * 1000 # Convert to ms

print(f"\n‚úÖ Prediction Result: {result}")
print(f"‚è±Ô∏è Latency: {latency:.2f} ms")
print(f"üéØ Actual Value (Ground Truth): {ground_truth}")

In [None]:
print("running batch stress test...")
success_count = 0
errors = []

# Take 100 random samples
batch = df.sample(100).drop(columns=[config.TARGET_COLUMN]).to_dict(orient='records')

for i, record in enumerate(batch):
    try:
        res = make_prediction(record)
        if res['status'] == 'Success':
            success_count += 1
    except Exception as e:
        errors.append(e)

print(f"Batch Test Complete: {success_count}/100 successful.")
if errors:
    print(f"First Error: {errors[0]}")