In [11]:
# Install required packages (if not already installed)
!pip install sqlalchemy networkx scikit-learn --quiet

import pandas as pd
import numpy as np
import networkx as nx
from sklearn.ensemble import IsolationForest
from sqlalchemy import create_engine
from datetime import datetime, timedelta
import random

# Step 1: Generate synthetic transaction dataset with 5000 entries
def generate_transactions(n=5000):
    customers = [f"Customer_{i}" for i in range(1, 501)]  # 500 customers
    merchants = [f"Merchant_{i}" for i in range(1, 101)]  # 100 merchants

    data = []
    for i in range(n):
        cust = random.choice(customers)
        merch = random.choice(merchants)
        amt = np.random.exponential(scale=150)  # average amount ~150
        ts = datetime.now() - timedelta(days=random.randint(0, 30))
        data.append([i, cust, merch, round(amt, 2), ts])
    return pd.DataFrame(data, columns=["transaction_id", "customer", "merchant", "amount", "timestamp"])

print("Generating 5000 transactions with 500 customers and 100 merchants...")
df = generate_transactions()
print("Sample transactions:")
print(df.head())

# Step 2: Build transaction graph
G = nx.Graph()
for _, row in df.iterrows():
    G.add_edge(row['customer'], row['merchant'], weight=row['amount'])

print(f"\nGraph nodes: {G.number_of_nodes()}, edges: {G.number_of_edges()}")

# Step 3: Extract graph features
deg_centrality = nx.degree_centrality(G)
weighted_degree = {}
for node in G.nodes():
    weighted_degree[node] = sum(attr['weight'] for _, _, attr in G.edges(node, data=True))

features = pd.DataFrame({
    'entity': list(G.nodes()),
    'degree_centrality': [deg_centrality[n] for n in G.nodes()],
    'weighted_degree': [weighted_degree[n] for n in G.nodes()]
})

print("\nSample graph features:")
print(features.head())

# Step 4: Anomaly detection using Isolation Forest
clf = IsolationForest(contamination=0.05, random_state=42)
features['anomaly'] = clf.fit_predict(features[['degree_centrality', 'weighted_degree']])

print("\nAnomaly detection results sample:")
print(features.head())

# Step 5: Store results in SQLite
engine = create_engine('sqlite:///banking_fraud.db')
features.to_sql('anomaly_results', con=engine, if_exists='replace', index=False)
print("\nAnomaly results stored in SQLite database 'banking_fraud.db'")

# Step 6: Show detected anomalies
anomalies = features[features['anomaly'] == -1]
print(f"\nTotal anomalies detected: {len(anomalies)}")
print(anomalies.head(10))


Generating 5000 transactions with 500 customers and 100 merchants...
Sample transactions:
   transaction_id      customer     merchant  amount  \
0               0   Customer_63  Merchant_37   23.48   
1               1  Customer_491  Merchant_44  215.88   
2               2  Customer_201  Merchant_79   11.76   
3               3   Customer_51  Merchant_62  423.52   
4               4   Customer_73   Merchant_8   32.27   

                   timestamp  
0 2025-07-18 18:27:11.537294  
1 2025-07-10 18:27:11.537318  
2 2025-07-11 18:27:11.537324  
3 2025-07-16 18:27:11.537328  
4 2025-07-30 18:27:11.537331  

Graph nodes: 600, edges: 4761

Sample graph features:
         entity  degree_centrality  weighted_degree
0   Customer_63           0.020033          2081.22
1   Merchant_37           0.071786          7070.17
2  Customer_491           0.021703          1721.29
3   Merchant_44           0.065109          5560.40
4  Customer_201           0.023372          1599.18

Anomaly detection r