In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax

# Define emotion labels
emotions = ['joy', 'sadness', 'anger', 'fear', 'love', 'surprise']

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(emotions))

# Function to predict emotion
def predict_emotion(text):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    
    # Get model output
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Apply softmax to get probabilities
    probs = softmax(outputs.logits, dim=1)
    
    # Get the predicted class (emotion)
    predicted_class = torch.argmax(probs, dim=1).item()
    
    # Get the confidence score
    confidence = probs[0][predicted_class].item()
    
    return emotions[predicted_class], confidence

# Example usage
text = "I'm so excited about my upcoming vacation!"
emotion, confidence = predict_emotion(text)
print(f"Text: {text}")
print(f"Predicted emotion: {emotion}")
print(f"Confidence: {confidence:.2f}")

# Note: In a real-world scenario, you would need to fine-tune this model on an emotion-labeled dataset
# The following code shows how you might approach fine-tuning:

'''
from transformers import Trainer, TrainingArguments
from torch.utils.data import Dataset

class EmotionDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        item = tokenizer(self.texts[idx], truncation=True, padding='max_length', max_length=512)
        item['labels'] = self.labels[idx]
        return item

# Assuming you have your data in 'texts' and 'labels' lists
train_dataset = EmotionDataset(texts, labels)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

trainer.train()
'''

  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Text: I'm so excited about my upcoming vacation!
Predicted emotion: anger
Confidence: 0.22


"\nfrom transformers import Trainer, TrainingArguments\nfrom torch.utils.data import Dataset\n\nclass EmotionDataset(Dataset):\n    def __init__(self, texts, labels):\n        self.texts = texts\n        self.labels = labels\n\n    def __len__(self):\n        return len(self.texts)\n\n    def __getitem__(self, idx):\n        item = tokenizer(self.texts[idx], truncation=True, padding='max_length', max_length=512)\n        item['labels'] = self.labels[idx]\n        return item\n\n# Assuming you have your data in 'texts' and 'labels' lists\ntrain_dataset = EmotionDataset(texts, labels)\n\ntraining_args = TrainingArguments(\n    output_dir='./results',\n    num_train_epochs=3,\n    per_device_train_batch_size=16,\n    per_device_eval_batch_size=64,\n    warmup_steps=500,\n    weight_decay=0.01,\n    logging_dir='./logs',\n)\n\ntrainer = Trainer(\n    model=model,\n    args=training_args,\n    train_dataset=train_dataset,\n)\n\ntrainer.train()\n"

In [31]:
import pandas as pd
from transformers import BertForSequenceClassification, BertTokenizer
import torch

# Load the dataset
data = pd.read_csv('text.csv')

# Preprocess the data
emotions = data['label'].unique()
emotion_to_id = {emotion: i for i, emotion in enumerate(emotions)}
data['emotion_id'] = data['label'].map(emotion_to_id)

# Split the data into training and testing sets
train_data = data.sample(frac=0.8, random_state=42)
test_data = data[~data.index.isin(train_data.index)]

# Initialize the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(emotions))

# Tokenize the text data
train_input_ids = []
train_attention_masks = []
for text in train_data['text']:
    encoding = tokenizer(text, padding='max_length', truncation=True, return_tensors='pt', max_length=512)
    train_input_ids.append(encoding['input_ids'])
    train_attention_masks.append(encoding['attention_mask'])

test_input_ids = []
test_attention_masks = []
for text in test_data['text']:
    encoding = tokenizer(text, padding='max_length', truncation=True, return_tensors='pt', max_length=512)
    test_input_ids.append(encoding['input_ids'])
    test_attention_masks.append(encoding['attention_mask'])

# Convert lists to tensors
train_input_ids = torch.cat(train_input_ids)
train_attention_masks = torch.cat(train_attention_masks)
train_labels = torch.tensor(train_data['emotion_id'].tolist())

test_input_ids = torch.cat(test_input_ids)
test_attention_masks = torch.cat(test_attention_masks)
test_labels = torch.tensor(test_data['emotion_id'].tolist())

# Fine-tune the BERT model
model.train()
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

for epoch in range(3):
    optimizer.zero_grad()
    outputs = model(train_input_ids.cuda(), attention_mask=train_attention_masks.cuda(), labels=train_labels.cuda())
    loss = outputs.loss
    loss.backward()
    optimizer.step()

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    outputs = model(test_input_ids.cuda(), attention_mask=test_attention_masks.cuda())
    logits = outputs.logits
    predicted_labels = torch.argmax(logits, dim=1).cpu().numpy()
    accuracy = (predicted_labels == test_labels.numpy()).mean()
    print(f"Test Accuracy: {accuracy:.2f}")

# Example prediction
example_text = "I am feeling happy today!"
encoding = tokenizer(example_text, padding='max_length', truncation=True, return_tensors='pt', max_length=512)
model.eval()
with torch.no_grad():
    output = model(encoding['input_ids'].cuda(), attention_mask=encoding['attention_mask'].cuda())
    predicted_emotion_id = torch.argmax(output.logits).item()
    predicted_emotion = list(emotion_to_id.keys())[list(emotion_to_id.values()).index(predicted_emotion_id)]
    print(f"Predicted Emotion: {predicted_emotion}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


AssertionError: Torch not compiled with CUDA enabled