# This project is a sample poc for trade surveillance
(with single rule)

### read sample trade data

In [83]:
trade_data = """timestamp,order_id,trader_id,symbol,side,price,quantity,status
2024-06-03 10:00:00,1,TR123,AAPL,buy,185.0,100,placed
2024-06-03 10:00:01,2,TR123,AAPL,buy,185.0,100,canceled
2024-06-03 10:00:02,3,TR123,AAPL,buy,185.1,200,placed
2024-06-03 10:00:03,4,TR123,AAPL,buy,185.1,200,canceled
2024-06-03 10:00:04,5,TR123,AAPL,buy,185.2,300,placed
2024-06-03 10:00:05,6,TR123,AAPL,buy,185.2,300,canceled
2024-06-03 10:00:06,7,TR999,GOOG,sell,2500.0,50,placed
2024-06-03 10:00:07,8,TR999,GOOG,sell,2500.0,50,executed"""

In [84]:
with open('trade_data.csv', 'w') as f:
    f.write(trade_data)

In [85]:
import numpy as np
import pandas as pd

In [86]:
trade_data = pd.read_csv('trade_data.csv')

In [87]:
trade_data

Unnamed: 0,timestamp,order_id,trader_id,symbol,side,price,quantity,status
0,2024-06-03 10:00:00,1,TR123,AAPL,buy,185.0,100,placed
1,2024-06-03 10:00:01,2,TR123,AAPL,buy,185.0,100,canceled
2,2024-06-03 10:00:02,3,TR123,AAPL,buy,185.1,200,placed
3,2024-06-03 10:00:03,4,TR123,AAPL,buy,185.1,200,canceled
4,2024-06-03 10:00:04,5,TR123,AAPL,buy,185.2,300,placed
5,2024-06-03 10:00:05,6,TR123,AAPL,buy,185.2,300,canceled
6,2024-06-03 10:00:06,7,TR999,GOOG,sell,2500.0,50,placed
7,2024-06-03 10:00:07,8,TR999,GOOG,sell,2500.0,50,executed


In [88]:
trade_data.dtypes

timestamp     object
order_id       int64
trader_id     object
symbol        object
side          object
price        float64
quantity       int64
status        object
dtype: object

### logic to identify spoofing

possible spoofing detected -> when a particular trader places and cancels too many orders in short time<br>
in above table looks like trader TR123 has placed and cancelled multiple orders within just few seconds<br>
that makes his transactions concerning and spoofing alert is triggerred

In [89]:
grouped_df = trade_data.groupby('trader_id')

In [90]:
min_orders = 5
cancel_threshold = 0.4

In [91]:
alerts = []

In [92]:
for trader_id, group in grouped_df:
    total_orders = len(group)
    canceled_orders = len(group[group["status"] == "canceled"])
    if total_orders >= min_orders and (canceled_orders / total_orders) > cancel_threshold:
        alerts.append({
            "trader_id": trader_id,
            "alert_type": "spoofing detected",
            "total_orders": total_orders,
            "canceled_orders": canceled_orders,
            "cancel_ratio": round(canceled_orders / total_orders, 2)
        })

In [93]:
alerts

[{'trader_id': 'TR123',
  'alert_type': 'spoofing detected',
  'total_orders': 6,
  'canceled_orders': 3,
  'cancel_ratio': 0.5}]

### package everything inside function

In [94]:
def detect_spoofing(df, min_orders=5, cancel_threshold=0.4):
    alerts = []
    for trader_id, group in grouped_df:
        total_orders = len(group)
        canceled_orders = len(group[group["status"] == "canceled"])
        if total_orders >= min_orders and (canceled_orders / total_orders) > cancel_threshold:
            alerts.append({
                "trader_id": trader_id,
                "alert_type": "spoofing detected",
                "total_orders": total_orders,
                "canceled_orders": canceled_orders,
                "cancel_ratio": round(canceled_orders / total_orders, 2)
            })
    return alerts

In [95]:
detect_spoofing(trade_data)

[{'trader_id': 'TR123',
  'alert_type': 'spoofing detected',
  'total_orders': 6,
  'canceled_orders': 3,
  'cancel_ratio': 0.5}]

In [96]:
def run_surveillance(csv_path):
    df = pd.read_csv(csv_path, parse_dates=["timestamp"])
    alerts = detect_spoofing(df)
    return alerts

In [97]:
run_surveillance('./trade_data.csv')

[{'trader_id': 'TR123',
  'alert_type': 'spoofing detected',
  'total_orders': 6,
  'canceled_orders': 3,
  'cancel_ratio': 0.5}]

----