In [1]:
import torch
import pandas as pd

if torch.cuda.is_available():
    device = "cuda"
else:
    raise ValueError("not on cuda")

In [3]:
df = pd.read_parquet(
    "data/0xFDC1BE05aD924e6Fc4Ab2c6443279fF7C0AB5544_training_data.parquet"
)
test_df = df[-10_000:]
train_df = df[:-10_000]

In [7]:
training_cols = [
    "wallet_age",
    "incoming_tx_count",
    "outgoing_tx_count",
    "net_incoming_tx_count",
    "total_gas_paid_eth",
    "avg_gas_paid_per_tx_eth",
    "risky_tx_count",
    "risky_unique_contract_count",
    "risky_first_tx_timestamp",
    "risky_last_tx_timestamp",
    "risky_first_last_tx_timestamp_diff",
    "risky_sum_outgoing_amount_eth",
    "outgoing_tx_sum_eth",
    "incoming_tx_sum_eth",
    "outgoing_tx_avg_eth",
    "incoming_tx_avg_eth",
    "max_eth_ever",
    "min_eth_ever",
    "total_balance_eth",
    "risk_factor",
]

target_cols = "target"

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    train_df[training_cols].to_numpy(),
    train_df["target"].to_numpy(),
    test_size=0.2,
    random_state=24354325,
)

In [9]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

In [None]:
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

In [11]:
import torch.nn as nn


class PredictLiquidationsV1(nn.Module):
    """
    The final layer should be a sigmoid, to get the probability of liquidation.
    """

    def __init__(self, input_features, output_features, hidden_units):
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(in_features=hidden_units, out_features=output_features),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.linear_layer_stack(x)


model_1 = PredictLiquidationsV1(
    input_features=X_train.shape[1], output_features=1, hidden_units=82
).to(device)

In [None]:
import torch.optim as optim

# Define loss function and optimizer
loss_fn = nn.BCELoss()
optimizer = optim.Adam(params=model_1.parameters(), lr=0.001, weight_decay=0.01)

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split

# Create a dataset with 10,000 samples.
X, y = make_circles(n_samples=10000, noise=0.05, random_state=26)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=26
)

# Visualize the data.
fig, (train_ax, test_ax) = plt.subplots(
    ncols=2, sharex=True, sharey=True, figsize=(10, 5)
)
train_ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Spectral)
train_ax.set_title("Training Data")
train_ax.set_xlabel("Feature #0")
train_ax.set_ylabel("Feature #1")

test_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
test_ax.set_xlabel("Feature #0")
test_ax.set_title("Testing data")
plt.show()