# Fraud Detection Anomaly Complete

## Overview

Production fraud detection: stream transactions, build temporal knowledge graph, detect patterns, identify anomalies, and implement alert system.

## Workflow: Stream Transactions → Build Temporal KG → Detect Patterns → Identify Anomalies → Alert System


In [None]:
from semantica.ingest import StreamIngestor, FileIngestor
from semantica.parse import DocumentParser, StructuredDataParser
from semantica.semantic_extract import NERExtractor, RelationExtractor
from semantica.kg import GraphBuilder, GraphAnalyzer, TemporalPatternDetector
from semantica.reasoning import InferenceEngine
from datetime import datetime, timedelta
import json
import os
import tempfile


## Step 1: Stream Transactions


In [None]:
stream_ingestor = StreamIngestor()
file_ingestor = FileIngestor()
structured_parser = StructuredDataParser()

temp_dir = tempfile.mkdtemp()
transactions_file = os.path.join(temp_dir, "transactions.json")

transactions_data = [
    {
        "transaction_id": "txn_001",
        "user_id": "user_123",
        "amount": 150.00,
        "merchant": "Online Store",
        "location": "New York",
        "timestamp": (datetime.now() - timedelta(hours=1)).isoformat(),
        "device": "mobile"
    },
    {
        "transaction_id": "txn_002",
        "user_id": "user_123",
        "amount": 2500.00,
        "merchant": "Luxury Store",
        "location": "Paris",
        "timestamp": (datetime.now() - timedelta(minutes=30)).isoformat(),
        "device": "web"
    },
    {
        "transaction_id": "txn_003",
        "user_id": "user_456",
        "amount": 50.00,
        "merchant": "Grocery Store",
        "location": "San Francisco",
        "timestamp": (datetime.now() - timedelta(minutes=15)).isoformat(),
        "device": "mobile"
    },
    {
        "transaction_id": "txn_004",
        "user_id": "user_123",
        "amount": 5000.00,
        "merchant": "Electronics Store",
        "location": "Tokyo",
        "timestamp": (datetime.now() - timedelta(minutes=5)).isoformat(),
        "device": "mobile"
    }
]

with open(transactions_file, 'w') as f:
    json.dump(transactions_data, f)

file_objects = file_ingestor.ingest_file(transactions_file, read_content=True)
parsed_data = structured_parser.parse_json(transactions_file)

transaction_stream = []
for txn in parsed_data.get("data", transactions_data):
    if isinstance(txn, dict):
        txn_copy = txn.copy()
        if "timestamp" in txn_copy and isinstance(txn_copy["timestamp"], str):
            txn_copy["timestamp"] = datetime.fromisoformat(txn_copy["timestamp"])
        transaction_stream.append(txn_copy)

print(f"Ingested {len(file_objects)} transaction files")
print(f"Parsed {len(transaction_stream)} transactions from structured data")


## Step 2: Build Temporal Knowledge Graph


In [None]:
builder = GraphBuilder()

transaction_entities = []
relationships = []

for txn in transaction_stream:
    txn_id = txn["transaction_id"]
    user_id = txn["user_id"]
    merchant = txn["merchant"]
    location = txn["location"]
    
    transaction_entities.append({
        "id": txn_id,
        "type": "Transaction",
        "properties": {
            "amount": txn["amount"],
            "timestamp": txn["timestamp"].isoformat(),
            "device": txn["device"]
        }
    })
    
    transaction_entities.append({
        "id": user_id,
        "type": "User",
        "properties": {}
    })
    
    transaction_entities.append({
        "id": merchant,
        "type": "Merchant",
        "properties": {}
    })
    
    transaction_entities.append({
        "id": location,
        "type": "Location",
        "properties": {}
    })
    
    relationships.append({
        "source": user_id,
        "target": txn_id,
        "type": "performed",
        "properties": {"timestamp": txn["timestamp"].isoformat()}
    })
    
    relationships.append({
        "source": txn_id,
        "target": merchant,
        "type": "at_merchant",
        "properties": {}
    })
    
    relationships.append({
        "source": txn_id,
        "target": location,
        "type": "in_location",
        "properties": {}
    })

transaction_kg = builder.build(transaction_entities, relationships)

print(f"Built temporal knowledge graph with {len(transaction_entities)} entities and {len(relationships)} relationships")


## Step 3: Detect Patterns


In [None]:
inference_engine = InferenceEngine()
pattern_detector = TemporalPatternDetector()
graph_analyzer = GraphAnalyzer()

temporal_patterns = pattern_detector.detect_temporal_patterns(
    transaction_kg,
    pattern_type="sequence",
    min_frequency=2
)

connectivity_analysis = graph_analyzer.analyze_connectivity(transaction_kg)

fraud_patterns = []
user_transactions = {}
for txn in transaction_stream:
    user_id = txn["user_id"]
    if user_id not in user_transactions:
        user_transactions[user_id] = []
    user_transactions[user_id].append(txn)

for user_id, txns in user_transactions.items():
    if len(txns) > 1:
        amounts = [t["amount"] for t in txns]
        locations = [t["location"] for t in txns]
        timestamps = [t["timestamp"] for t in txns]
        
        if max(amounts) > 1000:
            fraud_patterns.append({
                "type": "high_value_transaction",
                "user_id": user_id,
                "amount": max(amounts),
                "severity": "medium"
            })
        
        if len(set(locations)) > 2:
            time_span = max(timestamps) - min(timestamps)
            if time_span.total_seconds() < 3600:
                fraud_patterns.append({
                    "type": "rapid_location_change",
                    "user_id": user_id,
                    "locations": list(set(locations)),
                    "severity": "high"
                })

print(f"Detected {len(fraud_patterns)} fraud patterns")
print(f"Temporal patterns: {len(temporal_patterns)}")
print(f"Connectivity analysis: {connectivity_analysis.get('is_connected', False)}")
for pattern in fraud_patterns:
    print(f"  Pattern: {pattern['type']} - User: {pattern['user_id']} - Severity: {pattern['severity']}")


## Step 4: Identify Anomalies


In [None]:
anomaly_patterns = pattern_detector.detect_temporal_patterns(
    transaction_kg,
    pattern_type="anomaly",
    min_frequency=1
)

anomalies = []
for txn in transaction_stream:
    score = 0
    reasons = []
    
    if txn["amount"] > 2000:
        score += 3
        reasons.append("High transaction amount")
    
    if txn["amount"] > 1000 and txn["device"] == "mobile":
        score += 2
        reasons.append("High amount on mobile device")
    
    user_txns = [t for t in transaction_stream if t["user_id"] == txn["user_id"]]
    if len(user_txns) > 1:
        recent_txns = sorted(user_txns, key=lambda x: x["timestamp"], reverse=True)[:3]
        locations = [t["location"] for t in recent_txns]
        if len(set(locations)) > 2:
            time_span = recent_txns[0]["timestamp"] - recent_txns[-1]["timestamp"]
            if time_span.total_seconds() < 3600:
                score += 4
                reasons.append("Rapid location changes")
    
    if score >= 3:
        anomalies.append({
            "transaction_id": txn["transaction_id"],
            "user_id": txn["user_id"],
            "severity": "high" if score >= 5 else "medium",
            "score": score,
            "reasons": reasons,
            "timestamp": txn["timestamp"]
        })

print(f"Detected {len(anomalies)} anomalies")
for anomaly in anomalies:
    print(f"  Transaction: {anomaly['transaction_id']} - Severity: {anomaly['severity']} - Score: {anomaly['score']}")
    print(f"    Reasons: {', '.join(anomaly['reasons'])}")


## Step 5: Alert System


In [None]:
def send_alert(anomaly):
    alert = {
        "alert_id": f"alert_{anomaly['transaction_id']}",
        "transaction_id": anomaly["transaction_id"],
        "user_id": anomaly["user_id"],
        "severity": anomaly["severity"],
        "timestamp": datetime.now().isoformat(),
        "reasons": anomaly["reasons"]
    }
    return alert

def log_fraud_event(anomaly):
    event = {
        "event_type": "fraud_detected",
        "transaction_id": anomaly["transaction_id"],
        "user_id": anomaly["user_id"],
        "severity": anomaly["severity"],
        "score": anomaly["score"],
        "timestamp": datetime.now().isoformat()
    }
    return event

threshold = 3
alerts = []
fraud_events = []

for anomaly in anomalies:
    if anomaly["score"] >= threshold:
        alert = send_alert(anomaly)
        alerts.append(alert)
        event = log_fraud_event(anomaly)
        fraud_events.append(event)

print(f"Generated {len(alerts)} alerts")
for alert in alerts:
    print(f"  Alert: {alert['alert_id']} - Severity: {alert['severity']} - Transaction: {alert['transaction_id']}")

print(f"\nLogged {len(fraud_events)} fraud events")

entities_count = len(transaction_kg.get("entities", []))
print(f"\nMonitoring {entities_count} transaction entities")


## Summary

Production fraud detection workflow:
- Transaction streaming configured
- Temporal knowledge graph built
- Fraud patterns detected
- Anomalies identified
- Alert system operational
