# Mini Batches
Iterate over a data stream in mini batches

In [1]:
import pandas as pd
from river import datasets
from deep_river import classification
from torch import nn
from river import compose
from river import preprocessing
from itertools import islice
from sklearn import metrics

In [2]:
dataset = datasets.Phishing()

In [3]:
class MyModule(nn.Module):
    def __init__(self, n_features):
        super(MyModule, self).__init__()
        self.dense0 = nn.Linear(n_features, 5)
        self.nonlin = nn.ReLU()
        self.dense1 = nn.Linear(5, 2)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.nonlin(self.dense1(X))
        X = self.softmax(X)
        return X


def batcher(iterable, batch_size):
    iterator = iter(iterable)
    while batch := list(islice(iterator, batch_size)):
        yield batch

In [4]:
model = compose.Pipeline(
    preprocessing.StandardScaler(),
    classification.Classifier(
        module=MyModule(10), loss_fn="binary_cross_entropy", optimizer_fn="sgd"
    ),
)
model

In [5]:
y_trues = []
y_preds = []
for batch in batcher(dataset, 5):
    x, y = zip(*batch)
    x = pd.DataFrame(x)
    y_trues.extend(y)
    y = pd.Series(y)
    y_preds.extend(model.predict_many(x))
    model.learn_many(x, y)  # make the model learn

In [6]:
metrics.accuracy_score(
    y_pred=[str(i) for i in y_preds], y_true=[str(i) for i in y_trues]
)

0.4144