## How efficient is online learning with PyTorch?

* PyTorch offers enables arbitrary network architectures and optimization procedures
* How much overhead does PyTorch introduce in online learning?

Test: Benchmark of softmax regression with `deep-river` and base `River` 

In [None]:
# Imports 
from river import compose, preprocessing, metrics, datasets, linear_model, optim
from tqdm import tqdm 
from deep_river.classification import Classifier
from time import time
from torch import nn
from utils import seed_everything
import torch


In [None]:
def run_test_train(model, data):
    metric = metrics.Accuracy()

    start = time()
    for x, y in tqdm(data):
        # Predict and train on current instance
        pred = model.predict_one(x)
        model.learn_one(x, y)
        # Record results
        metric.update(y, pred)

    print("\nRuntime: {:.2f} Seconds".format(time() - start))
    print("Accuracy: {:.2%}".format(metric.get()))

In [None]:
# Create classification pipeline
model = linear_model.SoftmaxRegression(
    optimizer=optim.SGD(lr=0.25), loss=optim.losses.CrossEntropy()
)

data = list(datasets.Insects().take(10_000))

run_test_train(model, data)

In [None]:
# Define softmax regression PyTorch module
class SoftmaxNet(nn.Module):
    def __init__(self, n_features) -> None:
        super().__init__()
        self.linear = nn.Linear(n_features, 6)

    def forward(self, x):
        return self.linear(x)

# Create classification model
model = Classifier(module=SoftmaxNet, lr=2, output_is_logit=True)

run_test_train(model, data)

* Softmax regression in `deep-river` runs slower than in base `River`
* Reasons 
  + dictionary &rarr; tensor conversions required for river compatibility
  + overhead inherent to PyTorch 

In [None]:
from torch.optim import SGD
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder

data = list(datasets.Insects().take(10_000))
x = torch.tensor([list(sample.values()) for sample, _ in data])
y = LabelEncoder().fit_transform([label for _, label in data])
y = F.one_hot(torch.tensor(y)).type(torch.float)

model = SoftmaxNet(n_features=x.shape[-1])
optimizer = SGD(model.parameters(), lr=1)
metric = metrics.Accuracy()
start = time()
for xi, yi in tqdm(zip(x, y), total=10_000):
    with torch.inference_mode():
        pred = model(xi).argmax()

    metric.update(yi.argmax().item(), pred.item())
    logits = model(xi)
    loss = F.binary_cross_entropy_with_logits(logits, yi)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

print("\nRuntime: {:.2f} Seconds".format(time() - start))
print("Accuracy: {:.2%}".format(metric.get()))