### Project 1: Sentiment Analysis with Natural Language Processing

a sentiment analysis model using natural language processing.


In [None]:
!pip install torchtext

In [None]:
## Import package
import torch
import torchtext
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext import LabelField
#from torchtext.legacy.data import Field, LabelField






In [None]:
# Define the sentiment analysis model
class SentimentClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(SentimentClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, text):
        embedded = self.dropout(self.embedding(text))
        output, (hidden, cell) = self.rnn(embedded)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        return self.fc(hidden.squeeze(0))

In [None]:
# Define the fields for preprocessing
TEXT = Field(tokenize='spacy', lower=True)
LABEL = LabelField(dtype=torch.float)




In [None]:
# Load the IMDB dataset
train_data, test_data = IMDB.splits(TEXT, LABEL)

In [None]:
# Build the vocabulary
TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.100d")
LABEL.build_vocab(train_data)

In [None]:
# Create iterators for the dataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_iterator, test_iterator = BucketIterator.splits(
    (train_data, test_data), batch_size=64, device=device)

In [None]:

# Initialize the model and optimizer
vocab_size = len(TEXT.vocab)
embedding_dim = 100
hidden_dim = 256
output_dim = 1
model = SentimentClassifier(vocab_size, embedding_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters())


In [None]:
# Move the model to the device
model = model.to(device)


In [None]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    train_loss = 0.0
    train_correct = 0
    model.train()
    for batch in train_iterator:
        optimizer.zero_grad()
        text = batch.text
        label = batch.label
        output = model(text).squeeze(1)
        loss = nn.BCEWithLogitsLoss()(output, label)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_correct += (torch.round(torch.sigmoid(output)) == label).sum().item()

    train_loss /= len(train_iterator)
    train_acc = train_correct / len(train_data)

    print(f'Epoch: {epoch+1}/{num_epochs}')
    print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}%')

