<!-- TABS -->
# Build and train classifier

In [None]:
# <testing: >
import numpy as np
import torch
from sklearn.datasets import make_blobs
from pinnacledb import pinnacle

db = pinnacle('mongomock://temp')

# Generate 100 data points with 16 features
X, y = make_blobs(n_samples=100, n_features=16, centers=3, random_state=42)


X_torch = torch.from_numpy(X).float()
y_torch = torch.from_numpy(y).long()
from pinnacledb.ext.torch import tensor
t = tensor(torch.float32, (16,))
db.add(t)

from pinnacledb.backends.mongodb import Collection
from pinnacledb import Document
data = [ Document({'X': t(X_torch[i]), 'y': float(y_torch[i])}) for i in range(100)]
db.execute(Collection('clt').insert_many(data))

input_key = 'X'
select = Collection('clt').find()

In [None]:
# <tab: Scikit-Learn>
from pinnacledb.ext.sklearn import Estimator, SklearnTrainer
from sklearn.svm import SVC

model = Estimator(
    identifier="my-model",
    object=SVC(),
    trainer=SklearnTrainer(
        "my-trainer",
        key=(input_key, "label"),
        select=training_select,
    ),
)

In [None]:
# <tab: Torch>
import torch
from torch import nn
from pinnacledb.ext.torch.model import TorchModel
from pinnacledb.ext.torch.training import TorchTrainer
from torch.nn.functional import cross_entropy


class SimpleModel(nn.Module):
    def __init__(self, input_size=16, hidden_size=32, num_classes=3):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

preprocess = lambda x: torch.tensor(x)

# Postprocess function for the model output    
def postprocess(x):
    return int(x.topk(1)[1].item())

def data_transform(features, label):
    return torch.tensor(features), label

# Create a Logistic Regression model
# feature_length is the input feature size
model = SimpleModel(feature_size, num_classes=num_classes)
model = TorchModel(
    identifier='my-model',
    object=model,         
    preprocess=preprocess,
    postprocess=postprocess,
    trainer=TorchTrainer(
        key=(input_key, 'label'),
        identifier='my_trainer',
        objective=cross_entropy,
        loader_kwargs={'batch_size': 10},
        max_iterations=1000,
        validation_interval=100,
        select=select,
        transform=data_transform,
    ),
)

In [None]:
# <testing: >
db.apply(model)

In [None]:
# <testing: >
print(db.load('model', 'my-model').predict(X_torch[0]))
print(y_torch[0])