# Transfer learning

<!-- TABS -->
## Connect to pinnacle

In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
APPLY = True
COLLECTION_NAME = '<var:table_name>' if not APPLY else 'transfer_learning'
ID_FIELD = '<var:id_field>' if not APPLY else '_id'
MODALITY = 'text'

In [None]:
from pinnacle import pinnacle, CFG
CFG.force_apply = True

db = pinnacle('mongomock:///test_db')

<!-- TABS -->
## Get useful sample data

In [None]:
# <tab: Text-Classification>
num_classes = 2
if MODALITY == 'text:
    !curl -O https://pinnacledb-public-demo.s3.amazonaws.com/text_classification.json
    import json
    
    with open("text_classification.json", "r") as f:
        data = json.load(f)
else:
    !curl -O https://pinnacledb-public-demo.s3.amazonaws.com/images_classification.zip && unzip images_classification.zip
    import json
    from PIL import Image
    
    with open('images/images.json', 'r') as f:
        data = json.load(f)
        
    data = [{'x': Image.open(d['image_path']), 'y': d['label']} for d in data]

After obtaining the data, we insert it into the database.

In [None]:
datas = [{'data': d['x'], 'label': d['y']} for d in data]

<!-- TABS -->
## Insert simple data

After turning on auto_schema, we can directly insert data, and pinnacle will automatically analyze the data type, and match the construction of the table and datatype.

In [None]:
from pinnacle import Document

table_or_collection = db[COLLECTION_NAME]

ids = db.execute(table_or_collection.insert([Document(data) for data in datas]))
select = table_or_collection.select()

<!-- TABS -->
## Compute features

In [None]:
# <tab: Text>
import sentence_transformers
from pinnacle import vector, Listener
from pinnacle_sentence_transformers import SentenceTransformer


pinnaclemodel_text = SentenceTransformer(
    identifier="embedding",
    object=sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2"),
    postprocess=lambda x: x.tolist(),
)

In [None]:
# <tab: Image>
import torchvision.models as models
from torchvision import transforms
from pinnacle_torch import TorchModel
from pinnacle import Listener
from PIL import Image

class TorchVisionEmbedding:
    def __init__(self):
        # Load the pre-trained ResNet-18 model
        self.resnet = models.resnet18(pretrained=True)
        
        # Set the model to evaluation mode
        self.resnet.eval()
        
    def preprocess(self, image):
        # Preprocess the image
        preprocess = preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        tensor_image = preprocess(image)
        return tensor_image
        
model = TorchVisionEmbedding()
pinnaclemodel_image = TorchModel(identifier='my-vision-model-torch', object=model.resnet, preprocess=model.preprocess, postprocess=lambda x: x.numpy().tolist())


In [None]:
from pinnacle.components.model import ModelRouter
feature_extractor = ModelRouter(
    'feature_extractor',
    models={
        'text': pinnaclemodel_text,
        'image': pinnaclemodel_image,
    },
    model='<var:embedding_model>' if not APPLY else MODALITY,
)

In [None]:
feature_extractor_listener = Listener(
        model=feature_extractor,
        select=select,
        key='data',
        identifier="features"
    )

if APPLY:
    feature_extractor_listener = db.apply(
        feature_extractor_listener
    )

## Choose features key from feature listener

In [None]:
x.unpack().keys()

<!-- TABS -->
## Build and train classifier

In [None]:
input_key = feature_extractor_listener.outputs
training_select = select.outputs(feature_extractor_listener.predict_id)
print(input_key)
x = next(training_select.execute())

In [None]:
# <tab: Scikit-Learn>
from pinnacle_sklearn import Estimator, SklearnTrainer
from sklearn.svm import SVC

scikit_model = Estimator(
    identifier="my-model-scikit",
    object=SVC(),
    trainer=SklearnTrainer(
        "my-trainer",
        key=(input_key, "label"),
        select=training_select,
    ),
)

In [None]:
# <tab: Torch>
import torch
from torch import nn
from pinnacle_torch.model import TorchModel
from pinnacle_torch.training import TorchTrainer
from torch.nn.functional import cross_entropy


class SimpleModel(nn.Module):
    def __init__(self, input_size=16, hidden_size=32, num_classes=3):
        super(SimpleModel, self).__init__()
        self.hidden_size = hidden_size
        self.fc1 = None #nn.Linear(in_features=None, out_features=hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        input_size = x.size(1)
        if self.fc1 is None:
            self.fc1 = nn.Linear(input_size, self.hidden_size)
            
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

preprocess = lambda x: torch.tensor(x)

# Postprocess function for the model output    
def postprocess(x):
    return int(x.topk(1)[1].item())

def data_transform(features, label):
    return torch.tensor(features), label

# Create a Logistic Regression model
# feature_length is the input feature size
model = SimpleModel( num_classes=num_classes)
torch_model = TorchModel(
    identifier='my-model-torch',
    object=model,         
    preprocess=preprocess,
    postprocess=postprocess,
    trainer=TorchTrainer(
        key=(input_key, 'label'),
        identifier='my_trainer',
        objective=cross_entropy,
        loader_kwargs={'batch_size': 10},
        max_iterations=1000,
        validation_interval=100,
        select=select,
        transform=data_transform,
    ),
)

Define a validation for evaluating the effect after training.

In [None]:
from pinnacle import Dataset, Metric, Validation


def acc(x, y):
    return sum([xx == yy for xx, yy in zip(x, y)]) / len(x)


accuracy = Metric(identifier="acc", object=acc)
validation = Validation(
    "transfer_learning_performance",
    key=(input_key, "label"),
    datasets=[
        Dataset(identifier="my-valid", select=training_select.add_fold('valid'))
    ],
    metrics=[accuracy],
)
scikit_model.validation = validation
torch_model.validation = validation

If we execute the apply function, then the model will be added to the database, and because the model has a Trainer, it will perform training tasks.

In [None]:
trainer = ModelRouter(
    'trainer',
    models={
        'scikit': scikit_model,
        'torch': torch_model,
    },
    model='<var:trainer>' if not APPLY else 'torch',
)

In [None]:
if APPLY:
    db.apply(trainer)

In [None]:
trainer.encode()

Get the training metrics

In [None]:
db.show('model')

In [None]:
model = db.load('model', 'my-model-scikit')
model.metric_values

In [None]:
from pinnacle import Application

application = Application(identifier='transfer-learning', components=[feature_extractor_listener, trainer])

In [None]:
from pinnacle import Template

t = Template('transfer-learner',   data=data, template=application, template_variables=['trainer', 'embedding_model', 'table_name'])

In [None]:
t.export('.')