# Sentiment Analysis using TextEmbeddingProcessor

In this notebook, we will demonstrate how to use the `TextEmbeddingProcessor` class for sentiment analysis. We will process text input and generate embeddings using a pre-trained transformer model.

In [1]:
from river import datasets
from river import linear_model
from river import metrics
from collections import defaultdict
from deep_river.preprocessing.embedding import EmbeddingTransformer
import torch.nn as nn

In [2]:
# Load the Sentiment140 dataset (already provided by River)
dataset = datasets.SMSSpam()
# Metric for evaluation
metric = metrics.Accuracy()

for x, y in dataset:
    print(x)
    print(f"Number of available bikes: {y}")
    break

{'body': 'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...\n'}
Number of available bikes: False


In [3]:

# Initialize the EmbeddingTransformer



# Define the model
class WordEmbeddingModel(nn.Module):
    def __init__(self, n_features=2,  embedding_dim=50):
        super(WordEmbeddingModel, self).__init__()
        self.embedding = nn.Embedding(n_features, embedding_dim)
        self.fc = nn.Linear(embedding_dim, 1)  # Example output for binary classification

    def forward(self, x):
        embedded = self.embedding(x)
        return embedded.mean(dim=1)  # Mean pooling


# Logistic Regression for sentiment prediction
model = EmbeddingTransformer(
    module=WordEmbeddingModel,
    loss_fn="binary_cross_entropy_with_logits",
    optimizer_fn="adam_w",
    tokenizer="basic_english",
    lr=0.01
)
model |= linear_model.LogisticRegression()






In [4]:


# Training loop
for i, (x, y) in enumerate(dataset.take(100)):  # Limit to 100 instances for the example
    # Train the model
    y_pred = model.predict_one(x)
    model.learn_one(x, y)

    # Update and print metric
    metric.update(y, y_pred)

    # Print progress every 10 iterations
    if i % 10 == 0:
        print(f"Iteration {i}, Accuracy: {metric.get():.2%}")

# Final accuracy
print("Final Accuracy:", metric.get())

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)