In [6]:
import pandas as pd
import numpy as np
import joblib
from lime.lime_tabular import LimeTabularExplainer

# Load necessary files
training_columns = joblib.load("training_columns.pkl")
best_model = joblib.load("best_model.joblib")
preprocessor = best_model.named_steps['preprocessor']

# Input sample
new_data_default_risk = {
    'AMT_INCOME_TOTAL': 50000,
    'AMT_CREDIT': 600000,
    'AMT_ANNUITY': 25000,
    'AMT_GOODS_PRICE': 550000,
    'DAYS_BIRTH': -16000,
    'DAYS_EMPLOYED': 0,
    'REGION_POPULATION_RELATIVE': 0.04,
    'CNT_FAM_MEMBERS': 4,
    'FLAG_MOBIL': 1,
    'FLAG_EMAIL': 1,
    'FLAG_WORK_PHONE': 0,
    'NAME_INCOME_TYPE_Working': 0,
    'NAME_INCOME_TYPE_Unemployed': 1,
    'NAME_EDUCATION_TYPE_Higher_education': 0,
    'NAME_EDUCATION_TYPE_Secondary_education': 1,
    'NAME_FAMILY_STATUS_Married': 0,
    'NAME_FAMILY_STATUS_Single': 1,
    'NAME_HOUSING_TYPE_House_apartment': 1,
    'NAME_HOUSING_TYPE_With_parents': 0,
    'OCCUPATION_TYPE_Laborers': 1,
    'OCCUPATION_TYPE_Sales_staff': 0
}

# Align the input data with training columns
test_data = pd.DataFrame([new_data_default_risk]).reindex(columns=training_columns, fill_value=0)
test_data_processed = preprocessor.transform(test_data).astype(np.float32)

# Load training data to initialize the LIME explainer
X_train = pd.read_csv("X_train.csv")  # Ensure this file exists
X_train_processed = preprocessor.transform(X_train)

# Define a LIME prediction function using the pipeline
def lime_prediction_fn(data):
    """
    Predict probabilities using the trained pipeline for LIME.

    Args:
    - data (numpy.ndarray): Input data.

    Returns:
    - numpy.ndarray: Predicted probabilities for each class.
    """
    return best_model.named_steps['classifier'].predict_proba(data)

# Initialize the LIME Tabular Explainer
explainer = LimeTabularExplainer(
    training_data=X_train_processed,
    feature_names=training_columns,
    class_names=["Non-default", "Default"],
    mode="classification"
)

# Generate LIME explanation for the test instance
exp = explainer.explain_instance(
    data_row=test_data_processed[0],
    predict_fn=lime_prediction_fn,
    num_features=3  # Top 3 most important features
)

# Output the top 3 contributing features
print("\nTop 3 Contributing Features:")
for feature, contribution in exp.as_list():
    print(f"Feature: {feature}, Contribution: {contribution}")


Top 3 Contributing Features:
Feature: -0.24 < AMT_GOODS_PRICE <= 0.38, Contribution: -0.07069934474698475
Feature: NAME_EDUCATION_TYPE_Higher education > -0.57, Contribution: -0.04251217352253265
Feature: -0.15 < AMT_ANNUITY <= 0.51, Contribution: 0.03809088878060544


In [7]:
import requests
import json

# Inference endpoint URL
inference_url = "https://vicuna-llm.demo.svc.cluster.local:443/v1/chat/completions"

# Top 3 Contributing Features
contributing_features = """
Top 3 Contributing Features:
1. Feature: -0.24 < AMT_GOODS_PRICE <= 0.38, Contribution: -0.07069934474698475
2. Feature: NAME_EDUCATION_TYPE_Higher education > -0.57, Contribution: -0.04251217352253265
3. Feature: -0.15 < AMT_ANNUITY <= 0.51, Contribution: 0.03809088878060544

Explain this output to a loans officer and provide guidance on whether the loan should be approved or rejected.
"""

# Example payload to send to the Vicuna model
payload = {
    "model": "vicuna-llm",  # Replace with the specific model name if required
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant specializing in explaining data insights."
        },
        {
            "role": "user",
            "content": contributing_features
        }
    ],
    "temperature": 0.7,  # Adjust for response randomness
    "top_p": 0.9,        # Adjust for nucleus sampling
    "max_tokens": 200    # Adjust the token limit for longer responses
}

# Headers for the API request
headers = {
    "Content-Type": "application/json"
}

# Function to send a request to the API
def test_vicuna_inference(url, payload):
    try:
        print("Sending request to the Vicuna model...")
        response = requests.post(url, headers=headers, data=json.dumps(payload), verify=False)

        # Check if the response is successful
        if response.status_code == 200:
            print("Response from Vicuna:")
            response_data = response.json()
            print(json.dumps(response_data, indent=4))
        else:
            print(f"Failed with status code: {response.status_code}")
            print("Response:", response.text)
    except requests.RequestException as e:
        print(f"An error occurred: {e}")

# Test the API
test_vicuna_inference(inference_url, payload)

Sending request to the Vicuna model...
Response from Vicuna:
{
    "id": "cmpl-00c999994b734390ae3637493db82982",
    "object": "chat.completion",
    "created": 1732181900,
    "model": "vicuna-llm",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": "",
                "tool_calls": []
            },
            "logprobs": null,
            "finish_reason": "stop",
            "stop_reason": null
        }
    ],
    "usage": {
        "prompt_tokens": 214,
        "total_tokens": 215,
        "completion_tokens": 1
    }
}


