# How to get started in Money Laundering

In [1]:
from collections import defaultdict
from datetime import datetime, timedelta
import itertools

from icecream import ic
from yfiles_jupyter_graphs_for_kuzu import KuzuGraphWidget
import kuzu
import networkx as nx
import polars as pl
import watermark

from aml import Simulation

In [2]:
%load_ext watermark
%watermark
%watermark --iversions

Last updated: 2025-08-10T19:11:16.034606-07:00

Python implementation: CPython
Python version       : 3.13.3
IPython version      : 9.1.0

Compiler    : Clang 16.0.0 (clang-1600.0.26.6)
OS          : Darwin
Release     : 24.5.0
Machine     : arm64
Processor   : arm
CPU cores   : 14
Architecture: 64bit

networkx                      : 3.4.2
yfiles_jupyter_graphs_for_kuzu: 0.0.4
kuzu                          : 0.9.0
watermark                     : 2.5.0
polars                        : 1.29.0



## Extract one fraud network

Be sure to run the `create_graph.ipynb` notebook at least once first.

Then be sure to **shutdown** the `create_graph.ipynb` notebook after running it.

In [3]:
DB_PATH = "./db"

db = kuzu.Database(DB_PATH)
conn = kuzu.Connection(db)

Create a yFiles graph widget so we can explore our graph interactively

In [4]:
g = KuzuGraphWidget(conn)

In [5]:
g.show_cypher(
    """
    MATCH (a:Entity)-[b *1..3]->(c)
    WHERE a.descrip CONTAINS "Abassin"
    RETURN * LIMIT 100;
    """,
    layout="radial"
)

GraphWidget(layout=Layout(height='650px', width='100%'))

Now let's extract the shell companies in this particular fraud network

In [6]:
res = conn.execute(
    """
    MATCH (a:Entity)-[b *1..3]->(c)
    WHERE a.descrip CONTAINS "Abassin"
      AND c.kind = "ORGANIZATION"
    RETURN c.name, COLLECT(DISTINCT c.addr)
    LIMIT 100;
    """
)

shells: dict = {
    row[0]: row[1]
    for row in res.get_as_pl().iter_rows()
}

ic(shells);

ic| shells: {'BARLLOWS SERVICES LTD': ['31 Quernmore Close, Bromley, Kent, United Kingdom, '
                                       'BR1 4EL',
                                       '3 Market Parade, 41 East Street, Bromley, BR1 1QN'],
             'LMAR (GB) LTD': ['31 Quernmore Close, Bromley, Kent, United Kingdom, BR1 '
                               '4EL'],
             'WELLHANCIA HEALTH CARE LTD': ['31 Quernmore Close, Bromley, BR1 4EL']}


## Generate synthetic data for bank transactions

In [7]:
sim: Simulation = Simulation()
sim.gen_shell_corps(shells)

Simulate a "layering" phase a few times, followed by "deals" which drain the accounts to cash out.

In [8]:
PRESS_YOUR_LUCK: int = 3

for _ in range(PRESS_YOUR_LUCK):
    sim.layer_rmf()

sim.drain_into_deals()

Export the synthetic data as a DataFrame

In [9]:
pl.Config.set_tbl_rows(-1)

df: pl.DataFrame = sim.get_xact_df()
df.head()

date,amount,remitter,receiver,descript
str,f64,str,str,str
"""2025-08-11T19:11:15.394674""",41000.0,"""BCCI""","""BARLLOWS SERVICES LTD""","""local deposit"""
"""2025-08-13T19:11:15.394674""",52000.0,"""BCCI""","""BARLLOWS SERVICES LTD""","""local deposit"""
"""2025-08-13T19:11:15.394674""",44000.0,"""BCCI""","""BARLLOWS SERVICES LTD""","""local deposit"""
"""2025-08-14T19:11:15.394674""",43000.0,"""Ranchlander National Bank""","""LMAR (GB) LTD""","""local deposit"""
"""2025-08-14T19:11:15.394674""",45000.0,"""BCCI""","""BARLLOWS SERVICES LTD""","""local deposit"""


## Use graph analytics to detect patterns of tradecraft

First, we'll construct a graph of the transactions between companies.

In [10]:
graph: nx.DiGraph = nx.DiGraph()
node_names: list = []
edge_xact: dict = defaultdict(list)

for row in df.rows(named = True):
    src_label: str = row["remitter"]
    dst_label: str = row["receiver"]

    if src_label not in node_names:
        node_names.append(src_label)
        src_id: int = node_names.index(src_label)

        graph.add_node(
            src_id,
            name = src_label,
        )            
    else:
        src_id = node_names.index(src_label)

    if dst_label not in node_names:
        node_names.append(dst_label)
        dst_id: int = node_names.index(dst_label)

        graph.add_node(
            dst_id,
            name = dst_label,
        )
    else:
        dst_id = node_names.index(dst_label)

    graph.add_edge(
        src_id,
        dst_id,
    )

    edge_xact[ (src_id, dst_id) ].append({
        "amount": row["amount"],
        "date": datetime.fromisoformat(str(row["date"])),
        "descript": row["descript"],
    })    

In [11]:
for node_id, degree in sorted(nx.degree(graph), key = lambda x: x[1], reverse = True):
    graph.nodes[node_id]["degree"] = degree

Let's take a look at pairs companies involved in money transfers. 
One would expect that the *remitters* and *receivers* do not swith roles much, in other words the "suppliers" and "customers" of any given firm are two distinct populations.
Are there counter examples?

In [27]:
def total_pair (
    pair: tuple,
    ) -> float:
    """
Tally the total transacted amounts in one direction between a pair of companies.
    """
    return sum([
        item["amount"]
        for item in edge_xact[pair]
    ])


for pair in itertools.combinations(graph.nodes, 2):
    if graph.has_edge(*pair):
        rev_pair: tuple = ( pair[1], pair[0], )

        if graph.has_edge(*rev_pair):
            ic(pair, total_pair(pair), total_pair(rev_pair))

ic| pair: (1, 3)
    total_pair(pair): 104059.3
    total_pair(rev_pair): 32805.32
ic| pair: (1, 5)
    total_pair(pair): 28934.8
    total_pair(rev_pair): 98666.78
ic| pair: (3, 5)
    total_pair(pair): 98837.6
    total_pair(rev_pair): 39230.0


In fact, the three firms stand out -- the same ones used for "layering":

In [30]:
ic(graph.nodes[1])
ic(graph.nodes[3])
ic(graph.nodes[5])
;

ic| graph.nodes[1]: {'degree': 12, 'name': 'BARLLOWS SERVICES LTD'}
ic| graph.nodes[3]: {'degree': 8, 'name': 'LMAR (GB) LTD'}
ic| graph.nodes[5]: {'degree': 12, 'name': 'WELLHANCIA HEALTH CARE LTD'}


''