In [None]:
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification
import pandas as pd

In [None]:
df=pd.read_csv('df.csv')

In [None]:
df.columns

In [None]:
train_texts, test_texts, train_labels, test_labels = train_test_split(df['reviews_processed'], df['true_sentiment'], test_size=0.2, random_state=42)


In [None]:

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(df['true_sentiment'].unique()))


In [None]:
train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True, max_length=256)
test_encodings = tokenizer(test_texts.tolist(), truncation=True, padding=True, max_length=256)


In [None]:
from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], torch.tensor(train_labels.values))
test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'], torch.tensor(test_labels.values))

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8)
test_loader = DataLoader(test_dataset, batch_size=8)


In [None]:
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=1e-5)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

model.train()
for epoch in range(3):
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids, attention_mask, labels = batch
        inputs = {
            "input_ids": input_ids.to(device),
            "attention_mask": attention_mask.to(device),
            "labels": labels.to(device),
        }
        outputs = model(**inputs)
        loss = outputs.loss
        loss.backward()
        optimizer.step()


In [None]:
model.eval()
correct_predictions = 0

for batch in test_loader:
    input_ids, attention_mask, labels = batch
    with torch.no_grad():
        outputs = model(input_ids.to(device), attention_mask=attention_mask.to(device))
        logits = outputs.logits
        predicted = torch.argmax(logits, dim=1)
        correct_predictions += (predicted == labels.to(device)).sum().item()

accuracy = correct_predictions / len(test_labels)
print(f"Accuracy: {accuracy:.4f}")
