In [1]:
import json
import pandas as pd
from collections import defaultdict
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt


In [3]:
with open("user-wallet-transactions.json", "r") as f:
    data = json.load(f)

wallets = defaultdict(lambda: {
    "deposit_usd": 0,
    "borrow_usd": 0,
    "repay_usd": 0,
    "redeem_usd": 0,
    "liquidations": 0,
    "deposit_count": 0,
    "borrow_count": 0,
    "repay_count": 0
})

for entry in data:
    wallet = entry["userWallet"]
    action = entry.get("action")
    act_data = entry.get("actionData", {})

    try:
        amount = float(act_data.get("amount", 0))
        price = float(act_data.get("assetPriceUSD", 0))
        symbol = act_data.get("assetSymbol", "UNKNOWN")
        decimals = 1e6 if symbol == "USDC" else 1e18
        usd_value = (amount / decimals) * price
    except:
        usd_value = 0

    if action == "deposit":
        wallets[wallet]["deposit_usd"] += usd_value
        wallets[wallet]["deposit_count"] += 1
    elif action == "borrow":
        wallets[wallet]["borrow_usd"] += usd_value
        wallets[wallet]["borrow_count"] += 1
    elif action == "repay":
        wallets[wallet]["repay_usd"] += usd_value
        wallets[wallet]["repay_count"] += 1
    elif action == "redeemunderlying":
        wallets[wallet]["redeem_usd"] += usd_value
    elif action == "liquidationcall":
        wallets[wallet]["liquidations"] += 1

df = pd.DataFrame.from_dict(wallets, orient="index").reset_index()
df.rename(columns={"index": "wallet"}, inplace=True)
df["repay_ratio"] = df["repay_usd"] / df["borrow_usd"].replace(0, 1)


In [4]:
# Rule-of-thumb to simulate good (1) vs bad (0)
df["label"] = ((df["repay_ratio"] > 0.9) & (df["liquidations"] == 0) & (df["borrow_usd"] > 0)).astype(int)

df["label"].value_counts()


label
0    2806
1     691
Name: count, dtype: int64

In [5]:
features = [
    "deposit_usd", "borrow_usd", "repay_usd", "redeem_usd",
    "liquidations", "repay_ratio", "deposit_count", "borrow_count"
]

X = df[features]
y = df["label"]

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [6]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       568
           1       1.00      1.00      1.00       132

    accuracy                           1.00       700
   macro avg       1.00      1.00      1.00       700
weighted avg       1.00      1.00      1.00       700



In [13]:
proba = model.predict_proba(X_scaled)[:, 1]  # probability of being a 'good' user
df["ml_credit_score"] = (proba * 1000).astype(int)

df.head(50)


Unnamed: 0,wallet,deposit_usd,borrow_usd,repay_usd,redeem_usd,liquidations,deposit_count,borrow_count,repay_count,repay_ratio,label,ml_credit_score
0,0x00000000001accfa9cef68cf5371a23025b6d4b6,1987.664,0.0,0.0,0.0,0,1,0,0,0.0,0,0
1,0x000000000051d07a4fb3bd10121a343d85818da6,285.6945,0.0,0.0,0.0,0,1,0,0,0.0,0,0
2,0x000000000096026fb41fc39f9875d164bd82e2dc,0.005152311,0.0,0.0,0.0,0,2,0,0,0.0,0,0
3,0x0000000000e189dd664b9ab08a33c4839953852c,0.0,0.0,0.0,989.3606,0,0,0,0,0.0,0,0
4,0x0000000002032370b971dabd36d72f3e5a7bf1ee,703042.2,60297.62,7.015026e-08,601474.6,0,250,15,4,1.1634e-12,0,20
5,0x000000000a38444e0a6e37d3b630d7e855a7cb13,18390.67,4005.316,4020.402,9355.621,0,7,8,5,1.003767,1,980
6,0x000000003853fcedcd0355fec98ca3192833f00b,0.09826816,0.0,0.0,0.0,0,8,0,0,0.0,0,0
7,0x000000003ce0cf2c037493b1dc087204bd7f713e,645672.9,134818.4,2738.696,480324.3,0,309,30,16,0.02031396,0,0
8,0x000000007858e6f2668e1e06111cfa24403a5466,0.1421607,0.0,0.0,0.0,0,1,0,0,0.0,0,0
9,0x00000001a0f57e850c9db68b4a9bc34677437c5c,1.766653,0.0,0.0,0.0,0,1,0,0,0.0,0,0


In [8]:
df.to_csv("ml_wallet_credit_scores.csv", index=False)