In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, Dataset # Import Dataset class
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
dataset = pd.read_csv('./student_reviews.csv')
dataset.columns = ['text', 'label']
dataset.label = dataset.label.map({'positive': 0,
                                   'neutral' : 1,
                                   'negative': 2})
# Split the dataset into train and validation sets
train_dataset, val_dataset = train_test_split(dataset, test_size=0.1)
# Convert to Hugging Face Dataset objects
train_dataset = Dataset.from_pandas(train_dataset)
val_dataset = Dataset.from_pandas(val_dataset)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def preprocess_function(examples):
    return tokenizer(examples['text'], padding=True, truncation=True)

# Apply preprocessing to both train and validation datasets
train_dataset = train_dataset.map(preprocess_function, batched=True)
val_dataset = val_dataset.map(preprocess_function, batched=True)

# Set format for PyTorch
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])


In [None]:
train_dataset

In [None]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

In [None]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=10,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=8,    # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
)

In [None]:
trainer = Trainer(
    model=model,                         # the model to train
    args=training_args,                  # training arguments
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,            # evaluation dataset
    tokenizer=tokenizer,                 # tokenizer to be used
)

# Start training
trainer.train()


In [None]:
results = trainer.evaluate()
print(results)


In [None]:
def predict_sentiment(text):
    class_names = ['positive', 'neutral', 'negative']
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)

    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted label (0 or 1)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=-1).item()

    # Map predicted class to sentiment label

    sentiment = class_names[predicted_class]
    return sentiment


In [None]:
# Example text for sentiment prediction
text = "collage is evil"

# Predict sentiment
sentiment = predict_sentiment(text)
print(f"Sentiment: {sentiment}")