# Fraud Detection Workflow with Transformers

This notebook implements an agentic workflow for fraud detection using LangGraph and local Transformers models.

In [3]:
# Install required packages
!pip install -q langgraph langchain-core langchain-community transformers accelerate bitsandbytes google-colab

# Import libraries


In [4]:
from langgraph.graph import StateGraph, END
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from langchain_core.tools import tool
from typing import TypedDict, List, Literal, Dict, Any
import json
import random
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from IPython.display import display, Markdown

# Define state schema
class FraudDetectionState(TypedDict):
    transaction: dict
    context: dict
    messages: List[Dict[str, Any]]
    decision: Literal["continue", "final_decision"]
    final_verdict: Literal["legitimate", "suspicious", "fraudulent"]
    current_step: str

In [5]:
# Define tools
@tool
def check_blacklist(user_id: str, merchant_id: str) -> dict:
    """Check if user or merchant is on fraud blacklist"""
    user_blacklisted = user_id in ["fraud_user123", "suspicious_user456"]
    merchant_blacklisted = merchant_id in ["bad_merchant789", "risky_merchant101"]

    return {
        "user_blacklisted": user_blacklisted,
        "merchant_blacklisted": merchant_blacklisted,
        "blacklist_source": "global_fraud_database"
    }

@tool
def get_transaction_history(user_id: str, days: int = 30) -> list:
    """Retrieve user's transaction history for pattern analysis"""
    history = [
        {"amount": 120.50, "merchant": "amazon", "location": "US", "time": "2023-05-01"},
        {"amount": 45.30, "merchant": "starbucks", "location": "US", "time": "2023-05-03"},
        {"amount": 899.99, "merchant": "apple", "location": "US", "time": "2023-05-05"}
    ]

    for _ in range(random.randint(2, 5)):
        history.append({
            "amount": random.uniform(10, 500),
            "merchant": random.choice(["amazon", "walmart", "target", "bestbuy"]),
            "location": random.choice(["US", "CA", "UK"]),
            "time": f"2023-05-{random.randint(1, 28):02d}"
        })

    return history

@tool
def geolocation_verify(ip_address: str, claimed_location: str) -> dict:
    """Verify if transaction location matches IP geolocation"""
    ip_location = random.choice(["US", "CA", "UK", "FR", "DE"])
    location_match = ip_location == claimed_location

    return {
        "ip_location": ip_location,
        "claimed_location": claimed_location,
        "location_match": location_match,
        "risk_score": 0.8 if not location_match else 0.1
    }

@tool
def identity_verification(user_id: str, document_id: str) -> dict:
    """Perform identity verification checks"""
    verification_passed = user_id not in ["unverified_user789"]

    return {
        "verification_passed": verification_passed,
        "document_type": "passport",
        "verification_score": 0.9 if verification_passed else 0.2
    }

@tool
def calculate_risk_score(transaction: dict, context: dict) -> float:
    """Calculate overall fraud risk score"""
    risk_score = 0.2

    amount = float(transaction.get("amount", 0))
    if amount > 5000:
        risk_score += 0.2
    if amount > 10000:
        risk_score += 0.3

    if "foreign" in transaction.get("location", "").lower():
        risk_score += 0.2

    if transaction.get("merchant_category", "").lower() in ["gambling", "crypto"]:
        risk_score += 0.3

    if context.get("blacklist_results", {}).get("user_blacklisted"):
        risk_score += 0.3
    if context.get("blacklist_results", {}).get("merchant_blacklisted"):
        risk_score += 0.25
    if context.get("geolocation_results", {}).get("location_match") is False:
        risk_score += 0.2
    if context.get("identity_results", {}).get("verification_passed") is False:
        risk_score += 0.35

    return min(0.95, risk_score)

In [9]:
# Load model using transformers
def load_model():
    # List of models to try
    models = [
        "Qwen/Qwen3-0.6B" # Small model

    ]

    for model_name in models:
        try:
            print(f"Trying to load model: {model_name}")
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForCausalLM.from_pretrained(model_name)

            # Set pad token if it doesn't exist
            if tokenizer.pad_token is None:
                tokenizer.pad_token = tokenizer.eos_token

            # Create pipeline
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=100,
                temperature=0.1,
                device=0 if torch.cuda.is_available() else -1
            )

            print(f"✅ Successfully loaded model: {model_name}")
            return pipe

        except Exception as e:
            print(f"❌ Failed to load {model_name}: {e}")
            continue

    return None

# Load the model
model_pipeline = load_model()

if model_pipeline is None:
    raise ValueError("Could not load any model. Please try again later or use a different environment.")


Trying to load model: Qwen/Qwen3-0.6B


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


✅ Successfully loaded model: Qwen/Qwen3-0.6B


In [29]:



class SimpleLLM:
    def __init__(self, pipeline):
        self.pipeline = pipeline

    def invoke(self, messages):
        # Extract the last message content safely for the prompt
        prompt_text = ""
        if isinstance(messages, list) and messages:
            last_message = messages[-1]
            # Handle different message object types
            if hasattr(last_message, 'content') and isinstance(last_message.content, str):
                prompt_text = last_message.content
            elif isinstance(last_message, str):
                 # Fallback if the last item is a raw string (less likely in this setup, but safe)
                 prompt_text = last_message
            else:
                # Fallback to string representation if content is not a simple string
                prompt_text = str(last_message)
        else:
            prompt_text = str(messages)

        # Generate text
        try:
            # Ensure prompt is a string
            prompt_text = str(prompt_text)
            outputs = self.pipeline(prompt_text)
            generated_text = outputs[0]['generated_text']
            # Remove the input prompt from the output if it's included (common with transformers)
            if generated_text.startswith(prompt_text):
                generated_text = generated_text[len(prompt_text):].strip()

            # --- Crucial Fix: Ensure the response object has the expected structure ---
            # The error might occur if the response object isn't handled correctly downstream.
            # Creating an AIMessage is the standard practice in LangGraph/LangChain workflows.
            response_message = AIMessage(content=generated_text)

        except Exception as e:
            error_msg = f"Error generating response: {str(e)}"
            # Return an AIMessage even on error for consistency
            response_message = AIMessage(content=error_msg)

        return response_message # Return an AIMessage object


# Create the LLM wrapper
llm = SimpleLLM(model_pipeline)

# Initialize tools
tools = [check_blacklist, get_transaction_history, geolocation_verify, identity_verification, calculate_risk_score]


# Initialize workflow

workflow = StateGraph(FraudDetectionState)

# 1. Initial Assessment Node
def initial_assessment(state: FraudDetectionState) -> FraudDetectionState:
    """Perform initial transaction assessment"""
    transaction = state["transaction"]

    processed = {
        "amount": float(transaction.get("amount", 0)),
        "location": transaction.get("location", "").lower(),
        "time": transaction.get("time", ""),
        "user_id": transaction.get("user_id", ""),
        "merchant_id": transaction.get("merchant_id", ""),
        "ip_address": transaction.get("ip_address", ""),
        "document_id": transaction.get("document_id", ""),
        "merchant_category": transaction.get("merchant_category", "").lower()
    }

    context = {
        "processed_data": processed,
        "initial_flags": []
    }

    if processed["amount"] > 5000:
        context["initial_flags"].append("HIGH_AMOUNT")
    if "foreign" in processed["location"]:
        context["initial_flags"].append("FOREIGN_TRANSACTION")
    if processed["merchant_category"] in ["gambling", "crypto"]:
        context["initial_flags"].append("HIGH_RISK_MERCHANT")

    # Create a safe message without complex formatting
    initial_message = HumanMessage(content=f"""
    Analyze this transaction for potential fraud:
    Amount: {processed['amount']}
    Location: {processed['location']}
    Time: {processed['time']}
    User ID: {processed['user_id']}
    Merchant ID: {processed['merchant_id']}
    Merchant category: {processed['merchant_category']}
    Initial flags: {context['initial_flags']}

    Based on this initial assessment, decide what to do next:
    1. If you need more information, use the appropriate tools.
    2. If you have enough information, make a final decision.

    Your response should include either:
    - A tool call to gather more information
    - A final decision in the format: "Final Decision: [legitimate|suspicious|fraudulent]" with reasoning
    """)

    return {
        **state,
        "context": context,
        "messages": [initial_message],
        "current_step": "initial_assessment",
        "decision": "continue"
    }

In [28]:
def llm_decision(state: FraudDetectionState) -> FraudDetectionState:
    """LLM makes decision about next action or final verdict"""
    messages = state["messages"]

    # Get LLM response (should now be an AIMessage)
    response = llm.invoke(messages)

    # --- Safely check for final decision in the response content ---
    is_final = False
    # Ensure response has content and it's a string before checking
    response_content = getattr(response, 'content', '') # Safer way to access content
    if isinstance(response_content, str) and "Final Decision:" in response_content:
        is_final = True

    return {
        **state,
        "messages": messages + [response], # Append the AIMessage
        "decision": "final_decision" if is_final else "continue",
        "current_step": "llm_decision"
    }

In [27]:
# 3. Tool Execution Node
def execute_tools(state: FraudDetectionState) -> FraudDetectionState:
    """Execute tools requested by LLM"""
    messages = state["messages"]
    last_message = messages[-1]
    context = state["context"]

    # Get content safely
    content = last_message.content if hasattr(last_message, 'content') else str(last_message)
    print(f"content of the message --> {content}")

    tool_calls = []

    # Pattern matching for tool calls
    if "check_blacklist" in content:
        user_id = state["context"]["processed_data"]["user_id"]
        merchant_id = state["context"]["processed_data"]["merchant_id"]
        tool_calls.append({
            "name": "check_blacklist",
            "args": {"user_id": user_id, "merchant_id": merchant_id},
            "id": f"tool_call_{len(tool_calls)}"
        })

    if "get_transaction_history" in content:
        user_id = state["context"]["processed_data"]["user_id"]
        tool_calls.append({
            "name": "get_transaction_history",
            "args": {"user_id": user_id},
            "id": f"tool_call_{len(tool_calls)}"
        })

    if "geolocation_verify" in content:
        ip_address = state["context"]["processed_data"]["ip_address"]
        location = state["context"]["processed_data"]["location"]
        tool_calls.append({
            "name": "geolocation_verify",
            "args": {"ip_address": ip_address, "claimed_location": location},
            "id": f"tool_call_{len(tool_calls)}"
        })

    if "identity_verification" in content:
        user_id = state["context"]["processed_data"]["user_id"]
        document_id = state["context"]["processed_data"]["document_id"]
        tool_calls.append({
            "name": "identity_verification",
            "args": {"user_id": user_id, "document_id": document_id},
            "id": f"tool_call_{len(tool_calls)}"
        })

    if "calculate_risk_score" in content:
        tool_calls.append({
            "name": "calculate_risk_score",
            "args": {"transaction": state["transaction"], "context": context},
            "id": f"tool_call_{len(tool_calls)}"
        })

    # Tool mapping for manual execution
    tool_mapping = {
        "check_blacklist": check_blacklist,
        "get_transaction_history": get_transaction_history,
        "geolocation_verify": geolocation_verify,
        "identity_verification": identity_verification,
        "calculate_risk_score": calculate_risk_score
    }

    # Execute tools

    for tool_call in tool_calls:
        print(f"tool called --> {tool_call}")
        tool_name = tool_call["name"]
        tool_args = tool_call["args"]

        tool_func = tool_mapping.get(tool_name)
        if tool_func is None:
            tool_output = f"Error: Tool {tool_name} not found"
        else:
            try:
                tool_output = tool_func.invoke(tool_args)
            except Exception as e:
                tool_output = f"Error executing tool: {str(e)}"

        # Update context
        if tool_name == "check_blacklist":
            context["blacklist_results"] = tool_output
        elif tool_name == "get_transaction_history":
            context["transaction_history"] = tool_output
        elif tool_name == "geolocation_verify":
            context["geolocation_results"] = tool_output
        elif tool_name == "identity_verification":
            context["identity_results"] = tool_output
        elif tool_name == "calculate_risk_score":
            context["risk_score"] = tool_output

        messages.append(
            ToolMessage(
                content=str(tool_output),
                tool_call_id=tool_call["id"]
            )
        )

    # Create a safe follow-up message
    follow_up_message = HumanMessage(content=f"""
    I've executed the requested tools and updated the context.

    Current context summary:
    - Initial flags: {context.get('initial_flags', [])}
    - Blacklist results: {context.get('blacklist_results', 'Not checked')}
    - Transaction history: {context.get('transaction_history', 'Not retrieved')}
    - Geolocation results: {context.get('geolocation_results', 'Not verified')}
    - Identity verification: {context.get('identity_results', 'Not verified')}
    - Risk score: {context.get('risk_score', 'Not calculated')}

    Based on this information, decide what to do next:
    1. If you need more information, use the appropriate tools.
    2. If you have enough information, make a final decision.

    Your response should include either:
    - A tool call to gather more information
    - A final decision in the format: "Final Decision: [legitimate|suspicious|fraudulent]" with reasoning
    """)

    return {
        **state,
        "context": context,
        "messages": messages + [follow_up_message],
        "current_step": "execute_tools",
        "decision": "continue"
    }

In [23]:
def final_decision(state: FraudDetectionState) -> FraudDetectionState:
    """Extract and record the final decision"""
    messages = state["messages"]
    last_message = messages[-1]

    # --- Safely get content from the last message (could be HumanMessage, AIMessage, ToolMessage) ---
    # Use getattr with a default empty string
    content = getattr(last_message, 'content', '')

    verdict = "suspicious" # Default verdict

    if isinstance(content, str) and "Final Decision:" in content:
        try:
            decision_part = content.split("Final Decision:")[1].strip()
            if "legitimate" in decision_part.lower():
                verdict = "legitimate"
            elif "suspicious" in decision_part.lower():
                verdict = "suspicious"
            elif "fraudulent" in decision_part.lower():
                verdict = "fraudulent"
            # else: keep default 'suspicious'
        except IndexError:
            # Handle potential issues with splitting
            verdict = "suspicious"
    else:
        # Fallback based on risk score if "Final Decision:" marker is missing
        risk_score = state["context"].get("risk_score", 0.5)
        if risk_score > 0.8:
            verdict = "fraudulent"
        elif risk_score > 0.6:
            verdict = "suspicious"
        else:
            verdict = "legitimate"

    return {
        **state,
        "final_verdict": verdict,
        "current_step": "final_decision",
        "decision": "final_decision"
    }

In [24]:
# Add nodes to workflow
workflow.add_node("initial_assessment", initial_assessment)
workflow.add_node("llm_decision", llm_decision)
workflow.add_node("execute_tools", execute_tools)
workflow.add_node("final_decision", final_decision)

# Define workflow edges
workflow.set_entry_point("initial_assessment")
workflow.add_edge("initial_assessment", "llm_decision")

def route_after_llm(state: FraudDetectionState) -> str:
    if state["decision"] == "final_decision":
        return "final_decision"
    return "execute_tools"

workflow.add_conditional_edges(
    "llm_decision",
    route_after_llm,
    {
        "execute_tools": "execute_tools",
        "final_decision": "final_decision"
    }
)

workflow.add_edge("execute_tools", "llm_decision")
workflow.add_edge("final_decision", END)

# Compile the graph
app = workflow.compile()



ValueError: Node `initial_assessment` already present.

In [30]:
# Sample transaction
transaction = {
    "amount": "85",
    "location": "LOCAL",
    "time": "04:30",
    "user_id": "user123",
    "merchant_id": "merchant456",
    "ip_address": "192.168.1.1",
    "document_id": "XYZ",
    "merchant_category": "ecommerse"
}

# Initialize state
initial_state = {
    "transaction": transaction,
    "context": {},
    "messages": [],
    "decision": "continue",
    "final_verdict": "legitimate",
    "current_step": "start"
}

In [31]:
result = app.invoke(initial_state)

# Print results
print("\n=== FRAUD DETECTION RESULTS ===")
print(f"Transaction: {json.dumps(transaction, indent=2)}")
print("\nWorkflow Execution:")
for i, msg in enumerate(result["messages"]):
    # Access attributes directly
    role = msg.type # Use msg.type for the role (e.g., 'human', 'ai', 'tool')
    content = msg.content # Use msg.content for the message content
    print(f"\n--- Step {i+1} ({role.upper()}) ---")
    print(content)

print("\n=== CONTEXT SUMMARY ===")
for key, value in result["context"].items():
    if key != "processed_data":
        print(f"{key}: {json.dumps(value, indent=2)}")

print(f"\n=== FINAL VERDICT ===")
print(f"Decision: {result['final_verdict'].upper()}")
print("=== END OF REPORT ===")


=== FRAUD DETECTION RESULTS ===
Transaction: {
  "amount": "85",
  "location": "LOCAL",
  "time": "04:30",
  "user_id": "user123",
  "merchant_id": "merchant456",
  "ip_address": "192.168.1.1",
  "document_id": "XYZ",
  "merchant_category": "ecommerse"
}

Workflow Execution:

--- Step 1 (HUMAN) ---

    Analyze this transaction for potential fraud:
    Amount: 85.0
    Location: local
    Time: 04:30
    User ID: user123
    Merchant ID: merchant456
    Merchant category: ecommerse
    Initial flags: []
    
    Based on this initial assessment, decide what to do next:
    1. If you need more information, use the appropriate tools.
    2. If you have enough information, make a final decision.
    
    Your response should include either:
    - A tool call to gather more information
    - A final decision in the format: "Final Decision: [legitimate|suspicious|fraudulent]" with reasoning
    

--- Step 2 (AI) ---
**Final Answer**
The transaction is [legitimate|suspicious|fraudulent] bas