In [3]:
import warnings
warnings.filterwarnings("ignore")

import json
import torch
import pickle
import ollama
import numpy as np
from torch_geometric.nn import GATConv
import torch.nn.functional as F
from torch_geometric.data import Data
from captum.attr import IntegratedGradients

with open(r'path\to\model\fraud_ensemble.pkl', 'rb') as f:
    gnn_state_dict, xgb_model, scaler, label_encoder = pickle.load(f)

class GNN(torch.nn.Module):
    def __init__(self, num_features, hidden_dim, num_classes):
        super(GNN, self).__init__()
        self.conv1 = GATConv(num_features, hidden_dim)
        self.conv2 = GATConv(hidden_dim, 8)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

class ModelWrapper(torch.nn.Module):
    def __init__(self, model, edge_index):
        super().__init__()
        self.model = model
        self.edge_index = edge_index

    def forward(self, x):
        data = Data(x=x, edge_index=self.edge_index)
        return self.model(data)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_features = 4
num_classes = 2

gnn_model = GNN(num_features=num_features, hidden_dim=128, num_classes=num_classes).to(device)
gnn_model.load_state_dict(gnn_state_dict)
gnn_model.eval()

user_input = json.loads(input("Enter the input as a JSON string: "))

feature_values = np.array([[user_input["ASSURED_AGE"], int(user_input["POLICY SUMASSURED"].replace(",", "")),
                           int(user_input["Premium"].replace(",", "")), int(user_input["Annual Income"].replace(",", ""))]])
scaled_features = scaler.transform(feature_values)

node_features = torch.tensor(scaled_features, dtype=torch.float).to(device)
dummy_edge_index = torch.tensor([[0], [0]], dtype=torch.long).to(device)
data = Data(x=node_features, edge_index=dummy_edge_index)

with torch.no_grad():
    gnn_logits = gnn_model(data)
    gnn_pred = torch.argmax(gnn_logits, dim=1).item()

xgb_pred = xgb_model.predict(scaled_features)[0]

final_pred = xgb_pred if xgb_pred == gnn_pred else xgb_pred

xgb_feature_importance = xgb_model.feature_importances_
feature_names = ["ASSURED_AGE", "POLICY SUMASSURED", "Premium", "Annual Income"]
important_features = sorted(zip(feature_names, xgb_feature_importance),
                          key=lambda x: x[1], reverse=True)[:3]

wrapped_model = ModelWrapper(gnn_model, dummy_edge_index)
ig = IntegratedGradients(wrapped_model)
baseline = torch.zeros_like(node_features)
target = torch.tensor([final_pred], dtype=torch.long).to(device)
attributions = ig.attribute(node_features, target=target, baselines=baseline)
gnn_attr = attributions.cpu().detach().numpy()

explanation = {
    "Final Prediction": "Fraud" if final_pred == 1 else "Legitimate",
    "Top Features": [feat[0] for feat in important_features],
    "Feature Importance (XGB)": xgb_feature_importance.tolist(),
    "Feature Importance (GNN)": gnn_attr.tolist()
}

llama_prompt = f"""
Analyze the following insurance claim for fraud risk and explain the influencing factors:

Claim Details:
{json.dumps(user_input, indent=4)}

Model Prediction: {explanation['Final Prediction']}
Top Risk Factors: {', '.join(explanation['Top Features'])}

Feature Importance:
- XGBoost Analysis: {explanation['Feature Importance (XGB)']}
- GNN Analysis: {explanation['Feature Importance (GNN)']}

Calculate the risk contribution of all features and provide a detailed risk score and explanation. Recalculate the claim result based on the risk score.
Mainly consider the Fraud Category feature into consideration.

The explanation part should only mention the feature contribution determined logically by you not from the model.Dont return the output from the model.You consider them as additional information and calculate the risk scores and provide output based on the risk score boundaries.

Output in the following format:
Claim Result: Fraud/Legitimate/Potential Risk
Risk Score: #score#(in percent)
Fraud Category: (if fraud / potential risk)
Explanation: What contributed to the result (mention features if fraud)
Final verdict: Alert insurance agent to cross check else return claim can be approved

Risk Score Boundaries:
- Below 0.3: legitimate
- 0.3 to 0.6: potential risk
- Above 0.6: fraud

Dont visualize your calculating steps,Just provide the final output(refer the output format above).
"""

messages = [
    {"role": "system", "content": "You are an AI that explains fraud detection decisions."},
    {"role": "user", "content": llama_prompt}
]

response = ollama.chat(
    model="llama3.1",
    messages=messages
)

print("\n### Fraud Detection Explanation ###\n")
print(response['message']['content'])


### Fraud Detection Explanation ###

Claim Result: Potential Risk
Risk Score: 62%
Fraud Category: Identity Theft
Explanation: The high premium of ₹5,00,000 and policy sum assured of ₹50,00,000 in combination with an unemployed assured (ASSURED_AGE = 45) suggests that the claim may be fraudulent. Furthermore, the policy term and payment term being the same at 5 years implies that the assured might not have a regular income, making it difficult to pay premiums. The "Identity Theft" fraud category also supports this conclusion.
Final verdict: Alert insurance agent to cross check before approving the claim.
