# 📒 train_risk_model.ipynb (Notebook Template - Beraborrow Risk Model w/ Blockscout)


## 📌 Objective
Train a risk classification model (0 = safe, 1 = risky) for Beraborrow vaults using on-chain activity data from Blockscout.


In [1]:
## 📦 Setup
!pip install pandas requests scikit-learn joblib

Collecting pandas
  Using cached pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting joblib
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting numpy>=1.23.2 (from pandas)
  Downloading numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl.metadata (62 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-dateutil>=2.8.2 (from pandas)
  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting charset-normalizer<4,>=2

In [5]:

## 🔧 Configuration

import requests
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

BLOCKSCOUT_API = "https://rootstock.blockscout.com/api/v2"
ROOTSTOOCK_CONTRACT = "0x2bEE6167f91d10Db23252e03dE039dA6B9047D49"



In [17]:

## 📊 Fetch Transaction Data
def fetch_txns(contract):
    url = f"{BLOCKSCOUT_API}/addresses/{contract}/transactions"
    r = requests.get(url)
    r.raise_for_status()
    return pd.DataFrame(r.json()["items"])

transactions = fetch_txns(ROOTSTOOCK_CONTRACT)
transactions['timestamp'] = pd.to_datetime(transactions['timestamp'])
transactions['value_eth'] = pd.to_numeric(transactions['value'], errors='coerce') / 1e18

print(transactions['timestamp'], "test timepstampt")
print(transactions['value_eth'], "Value Eth")


transactions['method'] = transactions['method'].fillna("unknown")
method_counts = transactions['method'].value_counts().to_dict()

print(transactions['method'], "test the transaction method")
print(method_counts, "method_counts tests")

transactions['from'] = transactions['from'].apply(lambda x: x['hash'] if isinstance(x, dict) else x)

features = {
    "total_txns": len(transactions),
    "unique_users": transactions['from'].nunique(),
    "avg_value": transactions['value_eth'].mean(),

}

features.update({
    "deposits": method_counts.get("deposit", 0),
    "withdraws": method_counts.get("withdraw", 0),
    "approvals": method_counts.get("approve", 0),
    "unique_methods": transactions['method'].nunique(),
    "failed_txns": len(transactions[transactions['status'] == 'failed']),
})

features["label"] = 1 if features["avg_value"] < 0.001 else 0


pd.DataFrame([features]).to_csv("rootstock_features.csv", index=False)

print("✅ Feature set saved to ml-risk/rootstock_features.csv")

0    2025-05-31 20:25:39+00:00
1    2025-05-31 19:02:47+00:00
2    2025-05-31 16:06:13+00:00
3    2025-05-31 16:00:39+00:00
4    2025-05-31 15:46:35+00:00
5    2025-05-31 15:40:01+00:00
6    2025-05-31 12:25:54+00:00
7    2025-05-31 12:24:28+00:00
8    2025-05-31 12:22:55+00:00
9    2025-05-31 11:12:00+00:00
10   2025-05-31 11:09:22+00:00
11   2025-05-31 10:15:22+00:00
12   2025-05-31 10:11:16+00:00
13   2025-05-31 09:29:34+00:00
14   2025-05-31 09:07:46+00:00
15   2025-05-31 09:04:36+00:00
16   2025-05-31 08:48:35+00:00
17   2025-05-31 07:03:41+00:00
18   2025-05-31 07:00:00+00:00
19   2025-05-31 06:48:40+00:00
20   2025-05-31 06:46:19+00:00
21   2025-05-31 02:39:48+00:00
22   2025-05-31 02:34:39+00:00
23   2025-05-31 02:20:46+00:00
24   2025-05-31 02:09:58+00:00
25   2025-05-30 23:44:17+00:00
26   2025-05-30 23:41:41+00:00
27   2025-05-30 23:39:03+00:00
28   2025-05-30 23:36:45+00:00
29   2025-05-30 21:11:05+00:00
30   2025-05-30 18:17:00+00:00
31   2025-05-30 18:13:26+00:00
32   202

  transactions['timestamp'] = pd.to_datetime(transactions['timestamp'])


In [None]:
transactions = transactions.dropna(subset=["from", "timestamp", "value"])
token_transfers = token_transfers.dropna(subset=["from", "to", "value"])
internal_txns = internal_txns.fillna({'gas_used': 0})

print(transactions, "check transactions")
print(token_transfers, "check token_transfers")
print(internal_txns, "check internal_txns")



In [None]:
## 🧠 Feature Engineering



features["txns_per_user"] = features["total_txns"] / features["unique_users"] if features["unique_users"] > 0 else 0
features["net_token_flow"] = features["token_inflow"] - features["token_outflow"]
features["activity_score"] = (
    features["total_txns"] + features["internal_txn_count"] + features["token_inflow"]
) / (1 + features["last_activity_days"])



In [None]:
## 🏷️ Labeling (Manual or Rule-Based for MVP)

features["label"] = 1 if features["last_activity_days"] > 14 else 0  # Example logic


In [None]:
## 🧪 Build Dataset

df = pd.DataFrame([features])  # Append for multiple vaults if available

