# Fraud Detection Workflow
This notebook implements an agentic workflow for fraud detection using LangGraph and open-source models.

In [None]:
# Install required packages
!pip install -q langgraph langchain-core langchain-community langchain-huggingface transformers accelerate bitsandbytes google-colab huggingface_hub

In [None]:
# Import libraries
from langgraph.graph import StateGraph, END
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from langchain_core.tools import tool
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from huggingface_hub import HfApi, model_info
from typing import TypedDict, List, Literal, Dict, Any
import json
import random
from google.colab import userdata

In [None]:
# Get and verify Hugging Face token
hf_token = userdata.get('HF_TOKEN')
if not hf_token:
    raise ValueError("Please add HF_TOKEN to your Colab secrets")

# Test token
try:
    api = HfApi(token=hf_token)
    user_info = api.whoami()
    print(f"✅ Token valid for user: {user_info['name']}")
except Exception as e:
    raise ValueError(f"Invalid Hugging Face token: {e}. Please create a new token at https://huggingface.co/settings/tokens")

In [None]:
# Define available models with fallback options
MODELS = [
    "deepseek-ai/deepseek-coder-6.7b-instruct",  # Primary choice
    "HuggingFaceH4/zephyr-7b-beta",              # Fallback 1
    "mistralai/Mistral-7B-Instruct-v0.2",       # Fallback 2
    "google/flan-t5-large"                      # Fallback 3
]

# Find an accessible model
working_model = None
for model_id in MODELS:
    try:
        # Check if model is accessible
        info = model_info(model_id, token=hf_token)
        
        # Skip gated models we haven't accepted
        if info.gated and "deepseek" not in model_id:
            print(f"⚠️  Skipping gated model: {model_id}")
            continue
            
        # Try to initialize the model
        llm = HuggingFaceEndpoint(
            repo_id=model_id,
            task="text-generation",
            max_new_tokens=512,
            top_k=50,
            temperature=0.1,
            repetition_penalty=1.03,
            huggingfacehub_api_token=hf_token
        )
        
        # Test with a simple prompt
        test_response = llm.invoke("Hello")
        if test_response:
            working_model = model_id
            print(f"✅ Using model: {model_id}")
            break
            
    except Exception as e:
        print(f"❌ Failed to load {model_id}: {e}")

if not working_model:
    raise ValueError("No accessible model found. Please check your token and model permissions.")

# Initialize the working model
llm = HuggingFaceEndpoint(
    repo_id=working_model,
    task="text-generation",
    max_new_tokens=512,
    top_k=50,
    temperature=0.1,
    repetition_penalty=1.03,
    huggingfacehub_api_token=hf_token
)

# Wrap in ChatHuggingFace for better compatibility
chat_model = ChatHuggingFace(llm=llm)

In [None]:
# Define state schema
class FraudDetectionState(TypedDict):
    transaction: dict
    context: dict
    messages: List[Dict[str, Any]]
    decision: Literal["continue", "final_decision"]
    final_verdict: Literal["legitimate", "suspicious", "fraudulent"]
    current_step: str

In [None]:
# Define tools
@tool
def check_blacklist(user_id: str, merchant_id: str) -> dict:
    """Check if user or merchant is on fraud blacklist"""
    user_blacklisted = user_id in ["fraud_user123", "suspicious_user456"]
    merchant_blacklisted = merchant_id in ["bad_merchant789", "risky_merchant101"]
    
    return {
        "user_blacklisted": user_blacklisted,
        "merchant_blacklisted": merchant_blacklisted,
        "blacklist_source": "global_fraud_database"
    }

@tool
def get_transaction_history(user_id: str, days: int = 30) -> list:
    """Retrieve user's transaction history for pattern analysis"""
    history = [
        {"amount": 120.50, "merchant": "amazon", "location": "US", "time": "2023-05-01"},
        {"amount": 45.30, "merchant": "starbucks", "location": "US", "time": "2023-05-03"},
        {"amount": 899.99, "merchant": "apple", "location": "US", "time": "2023-05-05"}
    ]
    
    for _ in range(random.randint(2, 5)):
        history.append({
            "amount": random.uniform(10, 500),
            "merchant": random.choice(["amazon", "walmart", "target", "bestbuy"]),
            "location": random.choice(["US", "CA", "UK"]),
            "time": f"2023-05-{random.randint(1, 28):02d}"
        })
    
    return history

@tool
def geolocation_verify(ip_address: str, claimed_location: str) -> dict:
    """Verify if transaction location matches IP geolocation"""
    ip_location = random.choice(["US", "CA", "UK", "FR", "DE"])
    location_match = ip_location == claimed_location
    
    return {
        "ip_location": ip_location,
        "claimed_location": claimed_location,
        "location_match": location_match,
        "risk_score": 0.8 if not location_match else 0.1
    }

@tool
def identity_verification(user_id: str, document_id: str) -> dict:
    """Perform identity verification checks"""
    verification_passed = user_id not in ["unverified_user789"]
    
    return {
        "verification_passed": verification_passed,
        "document_type": "passport",
        "verification_score": 0.9 if verification_passed else 0.2
    }

@tool
def calculate_risk_score(transaction: dict, context: dict) -> float:
    """Calculate overall fraud risk score"""
    risk_score = 0.2
    
    amount = float(transaction.get("amount", 0))
    if amount > 5000:
        risk_score += 0.2
    if amount > 10000:
        risk_score += 0.3
    
    if "foreign" in transaction.get("location", "").lower():
        risk_score += 0.2
    
    if transaction.get("merchant_category", "").lower() in ["gambling", "crypto"]:
        risk_score += 0.3
    
    if context.get("blacklist_results", {}).get("user_blacklisted"):
        risk_score += 0.3
    if context.get("blacklist_results", {}).get("merchant_blacklisted"):
        risk_score += 0.25
    if context.get("geolocation_results", {}).get("location_match") is False:
        risk_score += 0.2
    if context.get("identity_results", {}).get("verification_passed") is False:
        risk_score += 0.35
    
    return min(0.95, risk_score)

In [None]:
# Initialize tools
tools = [check_blacklist, get_transaction_history, geolocation_verify, identity_verification, calculate_risk_score]

# Initialize workflow
workflow = StateGraph(FraudDetectionState)

In [None]:
# 1. Initial Assessment Node
def initial_assessment(state: FraudDetectionState) -> FraudDetectionState:
    """Perform initial transaction assessment"""
    transaction = state["transaction"]
    
    processed = {
        "amount": float(transaction.get("amount", 0)),
        "location": transaction.get("location", "").lower(),
        "time": transaction.get("time", ""),
        "user_id": transaction.get("user_id", ""),
        "merchant_id": transaction.get("merchant_id", ""),
        "ip_address": transaction.get("ip_address", ""),
        "document_id": transaction.get("document_id", ""),
        "merchant_category": transaction.get("merchant_category", "").lower()
    }
    
    context = {
        "processed_data": processed,
        "initial_flags": []
    }
    
    if processed["amount"] > 5000:
        context["initial_flags"].append("HIGH_AMOUNT")
    if "foreign" in processed["location"]:
        context["initial_flags"].append("FOREIGN_TRANSACTION")
    if processed["merchant_category"] in ["gambling", "crypto"]:
        context["initial_flags"].append("HIGH_RISK_MERCHANT")
    
    # Use json.dumps to safely include JSON in the message
    initial_message = HumanMessage(content=f"""
    Analyze this transaction for potential fraud:
    Transaction: {json.dumps(processed, indent=2)}
    Initial flags: {context['initial_flags']}
    
    Based on this initial assessment, decide what to do next:
    1. If you need more information, use the appropriate tools.
    2. If you have enough information, make a final decision.
    
    Your response should include either:
    - A tool call to gather more information
    - A final decision in the format: "Final Decision: [legitimate|suspicious|fraudulent]" with reasoning
    """)
    
    return {
        **state,
        "context": context,
        "messages": [initial_message],
        "current_step": "initial_assessment",
        "decision": "continue"
    }

In [None]:
# 2. LLM Decision Node
def llm_decision(state: FraudDetectionState) -> FraudDetectionState:
    """LLM makes decision about next action or final verdict"""
    messages = state["messages"]
    
    response = chat_model.invoke(messages)
    
    is_final = "Final Decision:" in response.content
    
    return {
        **state,
        "messages": messages + [response],
        "decision": "final_decision" if is_final else "continue",
        "current_step": "llm_decision"
    }

In [None]:
# 3. Tool Execution Node
def execute_tools(state: FraudDetectionState) -> FraudDetectionState:
    """Execute tools requested by LLM"""
    messages = state["messages"]
    last_message = messages[-1]
    context = state["context"]
    
    content = last_message.content
    tool_calls = []
    
    # Pattern matching for tool calls
    if "check_blacklist" in content:
        user_id = state["context"]["processed_data"]["user_id"]
        merchant_id = state["context"]["processed_data"]["merchant_id"]
        tool_calls.append({
            "name": "check_blacklist",
            "args": {"user_id": user_id, "merchant_id": merchant_id},
            "id": f"tool_call_{len(tool_calls)}"
        })
    
    if "get_transaction_history" in content:
        user_id = state["context"]["processed_data"]["user_id"]
        tool_calls.append({
            "name": "get_transaction_history",
            "args": {"user_id": user_id},
            "id": f"tool_call_{len(tool_calls)}"
        })
    
    if "geolocation_verify" in content:
        ip_address = state["context"]["processed_data"]["ip_address"]
        location = state["context"]["processed_data"]["location"]
        tool_calls.append({
            "name": "geolocation_verify",
            "args": {"ip_address": ip_address, "claimed_location": location},
            "id": f"tool_call_{len(tool_calls)}"
        })
    
    if "identity_verification" in content:
        user_id = state["context"]["processed_data"]["user_id"]
        document_id = state["context"]["processed_data"]["document_id"]
        tool_calls.append({
            "name": "identity_verification",
            "args": {"user_id": user_id, "document_id": document_id},
            "id": f"tool_call_{len(tool_calls)}"
        })
    
    if "calculate_risk_score" in content:
        tool_calls.append({
            "name": "calculate_risk_score",
            "args": {"transaction": state["transaction"], "context": context},
            "id": f"tool_call_{len(tool_calls)}"
        })
    
    # Tool mapping for manual execution
    tool_mapping = {
        "check_blacklist": check_blacklist,
        "get_transaction_history": get_transaction_history,
        "geolocation_verify": geolocation_verify,
        "identity_verification": identity_verification,
        "calculate_risk_score": calculate_risk_score
    }
    
    # Execute tools
    for tool_call in tool_calls:
        tool_name = tool_call["name"]
        tool_args = tool_call["args"]
        
        tool_func = tool_mapping.get(tool_name)
        if tool_func is None:
            tool_output = f"Error: Tool {tool_name} not found"
        else:
            try:
                tool_output = tool_func.invoke(tool_args)
            except Exception as e:
                tool_output = f"Error executing tool: {str(e)}"
        
        # Update context
        if tool_name == "check_blacklist":
            context["blacklist_results"] = tool_output
        elif tool_name == "get_transaction_history":
            context["transaction_history"] = tool_output
        elif tool_name == "geolocation_verify":
            context["geolocation_results"] = tool_output
        elif tool_name == "identity_verification":
            context["identity_results"] = tool_output
        elif tool_name == "calculate_risk_score":
            context["risk_score"] = tool_output
        
        messages.append(
            ToolMessage(
                content=str(tool_output),
                tool_call_id=tool_call["id"]
            )
        )
    
    # Use json.dumps to safely include context in the message
    follow_up_message = HumanMessage(content=f"""
    I've executed the requested tools and updated the context.
    
    Current context summary:
    - Initial flags: {context.get('initial_flags', [])}
    - Blacklist results: {json.dumps(context.get('blacklist_results', 'Not checked'), indent=2)}
    - Transaction history: {json.dumps(context.get('transaction_history', 'Not retrieved'), indent=2)}
    - Geolocation results: {json.dumps(context.get('geolocation_results', 'Not verified'), indent=2)}
    - Identity verification: {json.dumps(context.get('identity_results', 'Not verified'), indent=2)}
    - Risk score: {json.dumps(context.get('risk_score', 'Not calculated'), indent=2)}
    
    Based on this information, decide what to do next:
    1. If you need more information, use the appropriate tools.
    2. If you have enough information, make a final decision.
    
    Your response should include either:
    - A tool call to gather more information
    - A final decision in the format: "Final Decision: [legitimate|suspicious|fraudulent]" with reasoning
    """)
    
    return {
        **state,
        "context": context,
        "messages": messages + [follow_up_message],
        "current_step": "execute_tools",
        "decision": "continue"
    }

In [None]:
# 4. Final Decision Node
def final_decision(state: FraudDetectionState) -> FraudDetectionState:
    """Extract and record the final decision"""
    messages = state["messages"]
    last_message = messages[-1]
    
    content = last_message.content
    if "Final Decision:" in content:
        decision_part = content.split("Final Decision:")[1].strip()
        if "legitimate" in decision_part.lower():
            verdict = "legitimate"
        elif "suspicious" in decision_part.lower():
            verdict = "suspicious"
        elif "fraudulent" in decision_part.lower():
            verdict = "fraudulent"
        else:
            verdict = "suspicious"
    else:
        risk_score = state["context"].get("risk_score", 0.5)
        if risk_score > 0.8:
            verdict = "fraudulent"
        elif risk_score > 0.6:
            verdict = "suspicious"
        else:
            verdict = "legitimate"
    
    return {
        **state,
        "final_verdict": verdict,
        "current_step": "final_decision",
        "decision": "final_decision"
    }

In [None]:
# Add nodes to workflow
workflow.add_node("initial_assessment", initial_assessment)
workflow.add_node("llm_decision", llm_decision)
workflow.add_node("execute_tools", execute_tools)
workflow.add_node("final_decision", final_decision)

# Define workflow edges
workflow.set_entry_point("initial_assessment")
workflow.add_edge("initial_assessment", "llm_decision")

def route_after_llm(state: FraudDetectionState) -> str:
    if state["decision"] == "final_decision":
        return "final_decision"
    return "execute_tools"

workflow.add_conditional_edges(
    "llm_decision",
    route_after_llm,
    {
        "execute_tools": "execute_tools",
        "final_decision": "final_decision"
    }
)

workflow.add_edge("execute_tools", "llm_decision")
workflow.add_edge("final_decision", END)

# Compile the graph
app = workflow.compile()

In [None]:
# Sample transaction
transaction = {
    "amount": "8500",
    "location": "FOREIGN_COUNTRY",
    "time": "04:30",
    "user_id": "user123",
    "merchant_id": "merchant456",
    "ip_address": "192.168.1.1",
    "document_id": "DOC789",
    "merchant_category": "gambling"
}

# Initialize state
initial_state = {
    "transaction": transaction,
    "context": {},
    "messages": [],
    "decision": "continue",
    "final_verdict": "legitimate",
    "current_step": "start"
}

In [None]:
# Run the workflow
result = app.invoke(initial_state)

# Print results
print("\n=== FRAUD DETECTION RESULTS ===")
print(f"Transaction: {json.dumps(transaction, indent=2)}")
print("\nWorkflow Execution:")
for i, msg in enumerate(result["messages"]):
    role = msg.get("role", "system")
    content = msg.get("content", str(msg))
    print(f"\n--- Step {i+1} ({role.upper()}) ---")
    print(content)

print("\n=== CONTEXT SUMMARY ===")
for key, value in result["context"].items():
    if key != "processed_data":
        print(f"{key}: {json.dumps(value, indent=2)}")

print(f"\n=== FINAL VERDICT ===")
print(f"Decision: {result['final_verdict'].upper()}")
print("=== END OF REPORT ===")