In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

In [None]:
# Sample data (replace with your dataset)
sentences = ["This is a positive sentence.", "This is a negative sentence.", "Another positive example.", ...]
labels = ["positive", "negative", "positive", ...]

In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.2, random_state=42)

In [None]:
# Tokenize and vectorize the text data
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [None]:
# Convert text data to PyTorch tensors
X_train_tensor = torch.Tensor(X_train_vec.toarray()).long()
X_test_tensor = torch.Tensor(X_test_vec.toarray()).long()

# Convert labels to PyTorch tensors
y_train_tensor = torch.LongTensor(y_train_encoded)
y_test_tensor = torch.LongTensor(y_test_encoded)


In [None]:
# Create custom Dataset class
class TextDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [None]:
# Create DataLoader for training and testing
batch_size = 64
train_dataset = TextDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TextDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Define the RNN model
class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)  # Set batch_first=True
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text):
        embedded = self.embedding(text)
        output, hidden = self.rnn(embedded)  # Ignore the hidden state
        return self.fc(output[:, -1, :])  # Use the output from the last time step

#### output contains the hidden states for each time step in the sequence. It has the shape (sequence_length, batch_size, hidden_dim). Each time step's hidden state contains information about the input sequence up to that point.

#### hidden is the final hidden state after processing the entire sequence. It represents a summary of the entire input sequence and is typically used for making predictions or classification. It has the shape (num_layers, batch_size, hidden_dim)

In [None]:
# Initialize the model and define loss & optimizer
input_dim = X_train_tensor.shape[1]  # Size of the input features
embedding_dim = 100  # Embedding dimension
hidden_dim = 256  # RNN hidden layer size
output_dim = len(label_encoder.classes_)  # Number of classes

In [None]:
model = RNN(input_dim, embedding_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
# Training loop
def train(model, iterator, optimizer, criterion):
    model.train()
    for batch_text, batch_labels in iterator:
        optimizer.zero_grad()
        predictions = model(batch_text)
        loss = criterion(predictions, batch_labels)
        loss.backward()
        optimizer.step()

In [None]:
# Evaluation function
def evaluate(model, iterator, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_text, batch_labels in iterator:
            predictions = model(batch_text)
            loss = criterion(predictions, batch_labels)
            total_loss += loss.item()

            _, predicted = torch.max(predictions, 1)
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()

    return total_loss / len(iterator), correct / total

In [None]:
# Train the model
num_epochs = 10

for epoch in range(num_epochs):
    train(model, train_loader, optimizer, criterion)
    train_loss, train_acc = evaluate(model, train_loader, criterion)
    test_loss, test_acc = evaluate(model, test_loader, criterion)

    print(f'Epoch: {epoch + 1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%')
    print(f'\tTest Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%')