### Model evaluation

In [20]:
import joblib
import json
import numpy as np
import pandas as pd

In [21]:
# load model and components
model_filename = 'xgboost_model.pkl'
scaler_filename = 'scaler.pkl'
metadata_filename = 'model_metadata.json'

def load_model_components():
    """Load the trained model, scaler, and metadata"""
    model = joblib.load(model_filename)
    scaler = joblib.load(scaler_filename)
    
    with open(metadata_filename, 'r') as f:
        metadata = json.load(f)
    
    return model, scaler, metadata

model_loaded, scaler_loaded, metadata_loaded = load_model_components()


In [22]:
df = pd.read_csv('dataset.csv')
# random sampling
def get_sample_datapoint():
    random_index = np.random.randint(0, len(df))
    sample_data = df.iloc[random_index].to_dict()  # Get first row as dictionary
    return sample_data

def get_sample_json_data():
    sample_data = get_sample_datapoint()
    sample_json = json.dumps(sample_data, indent=2)
    return sample_json, sample_data

In [23]:

def evaluate_model(json_data, model, scaler, metadata):
    """
    Evaluate model on a single datapoint coming from JSON format
    
    Args:
        json_data: Dictionary or JSON string containing the datapoint
        model: Trained XGBoost model
        scaler: Fitted StandardScaler
        metadata: Model metadata containing feature names and class mapping
    
    Returns:
        Dictionary with prediction results
    """
    # Convert JSON string to dict if needed
    if isinstance(json_data, str):
        data = json.loads(json_data)
    else:
        data = json_data
    
    # Extract features (exclude target if present)
    feature_names = metadata['feature_names']
    X_sample = np.array([data[feat] for feat in feature_names]).reshape(1, -1)
    
    # Scale the features
    X_sample_scaled = scaler.transform(X_sample)
    
    # Make prediction
    prediction = model.predict(X_sample_scaled)[0]
    prediction_proba = model.predict_proba(X_sample_scaled)[0]
    
    # Convert prediction back to original class labels
    class_mapping = json.loads(metadata['class_mapping'])
    reverse_mapping = {v: k for k, v in class_mapping.items()}
    original_prediction = reverse_mapping[prediction]
    
    # Get class probabilities for original classes
    class_labels = sorted(class_mapping.keys())
    proba_dict = {class_labels[i]: float(prediction_proba[i]) for i in range(len(class_labels))}
    
    return {
        'predicted_class': int(original_prediction),
        'prediction_probabilities': proba_dict,
        'confidence': float(max(prediction_proba))
    }



In [24]:
# example request in JSON string (as would come from API)
sample_json_string, sample_data = get_sample_json_data()
print("Testing with JSON string input:")
result_json = evaluate_model(sample_json_string, model_loaded, scaler_loaded, metadata_loaded)
print(f"Result from JSON string: {result_json}")


Testing with JSON string input:
Result from JSON string: {'predicted_class': 1, 'prediction_probabilities': {'1': 0.32749372720718384, '2': 0.16105243563652039, '3': 0.24216479063034058, '4': 0.15721692144870758, '5': 0.1120721697807312}, 'confidence': 0.32749372720718384}


