In [59]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import TensorDataset, DataLoader

In [60]:
df = pd.read_csv('defillama_stablecoin_pools.csv')

# for `APY Reward`, `APY Base`, `APY`, and `APY Mean 30d` column, fill na with 0
df['APY Reward'] = df['APY Reward'].fillna(0)
df['APY Base'] = df['APY Base'].fillna(0)
df['APY'] = df['APY'].fillna(0)
df['APY Mean 30d'] = df['APY Mean 30d'].fillna(0)

# for `Confidence` column, fill na with 1
df['Confidence'] = df['Confidence'].fillna(1)

# for `Outlook` column, fill na with "Down"
df['Outlook'] = df['Outlook'].fillna('Down')

In [61]:
# Check na
print(df.isna().sum())

Pool            0
Project         0
Category        0
Chain           0
TVL             0
APY             0
APY Base        0
APY Mean 30d    0
APY Reward      0
Outlook         0
Confidence      0
dtype: int64


In [72]:
categorical_cols = df.select_dtypes(include=['object']).columns

# One-hot encode categorical columns
df = pd.get_dummies(df, columns=categorical_cols)
print(df.columns)

Index(['TVL', 'APY', 'APY Base', 'APY Mean 30d', 'APY Reward', 'Confidence',
       'Pool_9SUSDC11CORE', 'Pool_9SUSDCCORE', 'Pool_ADAI-AUSDC-AUSDT',
       'Pool_ALUSD-FRAX-USDC',
       ...
       'Chain_Sui', 'Chain_Taiko', 'Chain_Tezos', 'Chain_Ton', 'Chain_Tron',
       'Chain_Unit0', 'Chain_Venom', 'Chain_zkSync Era', 'Outlook_Down',
       'Outlook_Up'],
      dtype='object', length=623)


In [63]:
one_hot_columns_map = {}
for col in categorical_cols:
    prefix = col + "_"
    matched_cols = df.columns[df.columns.str.startswith(prefix)]
    one_hot_columns_map[col] = matched_cols.tolist()

# Create a cleaned version of the dictionary where the prefix is removed
cleaned_map = {}
for col, col_list in one_hot_columns_map.items():
    prefix = col + "_"
    cleaned_map[col] = [name.replace(prefix, "") for name in col_list]

import json
print(json.dumps(cleaned_map, indent=2))

{
  "Pool": [
    "9SUSDC11CORE",
    "9SUSDCCORE",
    "ADAI-AUSDC-AUSDT",
    "ALUSD-FRAX-USDC",
    "ALUSD-USDC",
    "AMDAI-AMUSDC-AMUSDT",
    "APRUSDC",
    "APUSDC-JWLAPUSDC",
    "APUSDC-USDC",
    "AUSD-USDC",
    "AUSD-USDT",
    "AUSDC",
    "AUSDT",
    "AVAI-USDC.E",
    "AVAXAI-USDC",
    "AVDAI-AVUSDC-AVUSDT",
    "AXLUSDC-USDC",
    "AXLUSDC-USDT",
    "AXLUSDC.E-USDC",
    "AXLUSDT-USDT",
    "BASEDAI-USDC",
    "BBQUSDC",
    "BBQUSDT",
    "BBUSDC",
    "BBUSDT",
    "BDAI-BUSDC-BUSDT",
    "BEEFYUSDC.E-SCUSD",
    "BOB-USDC.E",
    "BOLD-USDC",
    "BUSD-USDT",
    "BUSD-USDT-USDC-DAI",
    "CDAI-CUSDC",
    "CDAI-CUSDC-USDT",
    "CSUSDC",
    "CUSD-DAI-USDC",
    "CUSD-USDC",
    "CUSDO-USDC",
    "DAI-USDC",
    "DAI-USDC-USDT",
    "DAI-USDC-USDT-SUSD",
    "DAI-USDC.E",
    "DAI-USDC.E-USDT",
    "DAI-USDT",
    "DAI-USDT-USDC",
    "DAI.E-USDC",
    "DAI.E-USDT",
    "DEUSD-USDC",
    "DEUSD-USDT",
    "DOLA-FRAX-USDC",
    "DOLA-USDC",
    "DOLA-USDCE",
    "

In [64]:
X = df.drop('Confidence', axis=1)
y = df['Confidence'] - 1

# Normalize features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(X.shape, y.shape)

(1160, 622) (1160,)


In [65]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train.values)  # assuming y is integer class labels
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test.values)

# Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [66]:
from ann import ANN

# Set dimensions based on your data
input_dim = X_train_tensor.shape[1]
hidden_dim = 64  # You can adjust this as needed
output_dim = len(np.unique(y))  # Assumes 'target' contains class labels

model = ANN(input_dim, hidden_dim, output_dim)
print(model)

ANN(
  (fc1): Linear(in_features=622, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=3, bias=True)
)


In [67]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [68]:
def train(model, train_loader, criterion, optimizer, num_epochs):
    model.train()  # Set model to training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()         # Zero the parameter gradients
            outputs = model(inputs)       # Forward pass
            loss = criterion(outputs, labels)
            loss.backward()               # Backpropagation
            optimizer.step()              # Update parameters
            running_loss += loss.item() * inputs.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

def evaluate(model, test_loader):
    model.eval()  # Set model to evaluation mode
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Compute accuracy
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    accuracy = np.mean(all_preds == all_labels)
    
    print(f"Accuracy: {accuracy:.4f}")
    return all_labels, all_preds


In [69]:
num_epochs = 100  # Adjust the number of epochs as needed
train(model, train_loader, criterion, optimizer, num_epochs)

Epoch 1/100, Loss: 1.0759
Epoch 2/100, Loss: 0.8823
Epoch 3/100, Loss: 0.7172
Epoch 4/100, Loss: 0.5797
Epoch 5/100, Loss: 0.4901
Epoch 6/100, Loss: 0.4347
Epoch 7/100, Loss: 0.3956
Epoch 8/100, Loss: 0.3699
Epoch 9/100, Loss: 0.3500
Epoch 10/100, Loss: 0.3328
Epoch 11/100, Loss: 0.3219
Epoch 12/100, Loss: 0.3117
Epoch 13/100, Loss: 0.2940
Epoch 14/100, Loss: 0.2881
Epoch 15/100, Loss: 0.2746
Epoch 16/100, Loss: 0.2670
Epoch 17/100, Loss: 0.2578
Epoch 18/100, Loss: 0.2516
Epoch 19/100, Loss: 0.2399
Epoch 20/100, Loss: 0.2358
Epoch 21/100, Loss: 0.2291
Epoch 22/100, Loss: 0.2199
Epoch 23/100, Loss: 0.2130
Epoch 24/100, Loss: 0.2088
Epoch 25/100, Loss: 0.2017
Epoch 26/100, Loss: 0.1978
Epoch 27/100, Loss: 0.1906
Epoch 28/100, Loss: 0.1873
Epoch 29/100, Loss: 0.1827
Epoch 30/100, Loss: 0.1752
Epoch 31/100, Loss: 0.1728
Epoch 32/100, Loss: 0.1715
Epoch 33/100, Loss: 0.1640
Epoch 34/100, Loss: 0.1612
Epoch 35/100, Loss: 0.1593
Epoch 36/100, Loss: 0.1593
Epoch 37/100, Loss: 0.1551
Epoch 38/1

In [70]:
true_labels, predictions = evaluate(model, test_loader)

# Print the classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predictions))

print("Confusion Matrix:")
print(confusion_matrix(true_labels, predictions))

Accuracy: 0.4741
Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.69      0.59        88
           1       0.43      0.35      0.39        83
           2       0.44      0.33      0.38        61

    accuracy                           0.47       232
   macro avg       0.46      0.46      0.45       232
weighted avg       0.46      0.47      0.46       232

Confusion Matrix:
[[61 19  8]
 [37 29 17]
 [22 19 20]]


In [71]:
# Export the model
torch.save(model.state_dict(), '../model/risk-assess-model.pth')