In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, precision_score, recall_score, roc_auc_score

plt.style.use("default")
pd.set_option("display.max_columns", None)
trades = pd.read_csv("trades.csv")
activity = pd.read_csv("user_activity.csv")
trades['timestamp'] = pd.to_datetime(trades['timestamp'], unit='s')
activity['timestamp'] = pd.to_datetime(activity['timestamp'], unit='s')

trades['date'] = trades['timestamp'].dt.date
activity['date'] = activity['timestamp'].dt.date

trades['usd_volume'] = trades['amount'] / 1500
top3 = (
    trades.groupby('pair')['usd_volume']
    .sum()
    .sort_values(ascending=False)
    .head(3)
)

print(top3)

top3.plot(kind='bar')
plt.title("Top 3 Trading Pairs by USD Volume")
plt.ylabel("Total USD Volume")
plt.xlabel("Trading Pair")
plt.show()

btc = trades[trades['pair'] == 'BTCNGN'].copy()
btc['price'] = btc['amount'] / btc['volume']
daily = btc.groupby('date')['price'].mean().reset_index()
daily['return'] = daily['price'].pct_change()
daily['volatility_7d'] = daily['return'].rolling(7).std()
plt.figure(figsize=(10,5))
plt.plot(daily['date'], daily['volatility_7d'])
plt.title("7-Day Rolling Volatility â€” BTCNGN")
plt.xticks(rotation=45)
plt.show()

deposits = activity[activity['activity_type'] == 'deposit'].copy()

deposits['hour'] = deposits['timestamp'].dt.hour
deposits['day'] = deposits['timestamp'].dt.day_name()
deposits.groupby('hour').size().plot(kind='bar')
plt.title("Deposits by Hour")
plt.show()
deposits.groupby('day').size().plot(kind='bar')
plt.title("Deposits by Day of Week")
plt.xticks(rotation=45)
plt.show()

activity_summary = activity.pivot_table(
    index='user_id',
    columns='activity_type',
    values='timestamp',
    aggfunc='count',
    fill_value=0
).reset_index()

activity_summary['withdraw_deposit_ratio'] = (
    activity_summary['withdrawal'] /
    (activity_summary['deposit'] + 1)
)
trade_summary = trades.groupby('user_id').agg({
    'usd_volume': 'sum',
    'pair': 'nunique'
}).reset_index()

trade_summary.columns = ['user_id', 'total_traded_usd', 'unique_pairs']
users = activity_summary.merge(trade_summary, on='user_id', how='left')
users = users.fillna(0)
users['suspicious'] = np.where(
    (users['withdraw_deposit_ratio'] > 0.9) &
    (users['total_traded_usd'] < users['total_traded_usd'].median() * 0.1),
    1, 0
)
X = users.drop(columns=['user_id', 'suspicious'])
y = users['suspicious']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

preds = model.predict(X_test)
print(classification_report(y_test, preds))

print("Precision:", precision_score(y_test, preds))
print("Recall:", recall_score(y_test, preds))
print("ROC-AUC:", roc_auc_score(y_test, model.predict_proba(X_test)[:,1]))

SyntaxError: invalid syntax (ipython-input-2143278144.py, line 20)

Using KES pairs (e.g., ETHKES), define low-volume traders as:

Users with < $500 total trading volume

Users trading only 1 asset

Users with fewer than 5 trades per month

These users are:

Early-stage traders

Likely price-sensitive

Responsive to incentives

Suggested Campaign:

Reduced trading fees

Cashback for first 10 trades

Educational trading materials

Crypto Trading Analysis & Fraud Detection

Part 0: Setup
Part 1: Market Analysis
Part 2: Fraud Detection
Part 3: Strategic Recommendation

