In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.vocab import GloVe
import pandas as pd
import ast
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# Step 1: Load and Preprocess the Data
train_data = pd.read_csv('train_data.csv')
val_data = pd.read_csv('val_data.csv')

train_tweets = [[token for token in ast.literal_eval(tweet)] for tweet in train_data['tweet']]
val_tweets = [[token for token in ast.literal_eval(tweet)] for tweet in val_data['tweet']]

train_labels = torch.tensor(train_data['sentiment'].tolist())
val_labels = torch.tensor(val_data['sentiment'].tolist())

In [None]:
# Load the train and validation data
train_data = pd.read_csv('train_data.csv')
val_data = pd.read_csv('val_data.csv')

# Convert the tweet tokens to numerical indices
train_tweets = [[TEXT.vocab.stoi[token] for token in tweet] for tweet in train_data['tweet']]
val_tweets = [[TEXT.vocab.stoi[token] for token in tweet] for tweet in val_data['tweet']]

# Convert the sentiment labels to a tensor
train_labels = torch.tensor(train_data['sentiment'].tolist())
val_labels = torch.tensor(val_data['sentiment'].tolist())

# Convert the company names to numerical indices
train_companies = [COMPANY.vocab.stoi[company] for company in train_data['company']]
val_companies = [COMPANY.vocab.stoi[company] for company in val_data['company']]

# Combine tweet indices and company indices
train_inputs = torch.tensor(list(zip(train_tweets, train_companies)))
val_inputs = torch.tensor(list(zip(val_tweets, val_companies)))

# Create TensorDatasets
train_dataset = TensorDataset(train_inputs, train_labels)
val_dataset = TensorDataset(val_inputs, val_labels)

# Create DataLoaders for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [None]:
# Step 2: Define the Model
class TweetRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_class):
        super(TweetRNN, self).__init__()
        self.emb = nn.Embedding.from_pretrained(GloVe(name='6B', dim=50))
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_class)

    def forward(self, x):
        x = self.emb(x)
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        out, _ = self.rnn(x, h0)
        return self.fc(out[:, -1, :])

In [None]:
# Step 3: Create TensorDatasets and DataLoaders
train_inputs = torch.tensor([[token for token in tweet] for tweet in train_tweets])
val_inputs = torch.tensor([[token for token in tweet] for tweet in val_tweets])

train_dataset = TensorDataset(train_inputs, train_labels)
val_dataset = TensorDataset(val_inputs, val_labels)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [None]:
def train(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    total_correct = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        predictions = model(inputs)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_correct += (predictions.argmax(dim=1) == labels).sum().item()
    return total_loss / len(train_loader), total_correct / len(train_dataset)

def evaluate(model, val_loader, criterion):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            predictions = model(inputs)
            loss = criterion(predictions, labels)
            total_loss += loss.item()
            total_correct += (predictions.argmax(dim=1) == labels).sum().item()
    return total_loss / len(val_loader), total_correct / len(val_dataset)

num_epochs = 5
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    print(f"Epoch: {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")