In [None]:
import torch
from transformers import pipeline
import pandas as pd
from tqdm.notebook import tqdm  # For Jupyter Notebooks

# Load CSV files
train_data = pd.read_csv(r'D:\Downloads\dataset\X_train_cleaned.csv')
test_data = pd.read_csv(r'D:\Downloads\dataset\X_test_cleaned.csv')

# Extract features
X_train_cleaned = train_data['review_body'].tolist()  # Text data
X_test_cleaned = test_data['review_body'].tolist()

# Ensure the text data is in string format
X_train_cleaned = [str(item) for item in X_train_cleaned]
X_test_cleaned = [str(item) for item in X_test_cleaned]

# Load the emotion classification pipeline with truncation enabled
classifier = pipeline(
    "text-classification",
    model='bhadresh-savani/distilbert-base-uncased-emotion',
    return_all_scores=True,
    truncation=True,
    max_length=512
)

# Function to calculate emotion scores for a given sample
def emotion_scores(samples):
    scores = []
    for sample in tqdm(samples, desc="Calculating Emotion Scores"):
        try:
            emotion = classifier(sample)
            scores.append(emotion[0])  # Append the score list for this sample
        except Exception as e:
            print(f"Error processing sample: {sample[:50]}... - {str(e)}")
            scores.append(None)  # Handle exceptions by appending None
    return scores

# Calculate emotion scores for train and test datasets
train_emotion_scores = emotion_scores(X_train_cleaned)
test_emotion_scores = emotion_scores(X_test_cleaned)

# Save results to DataFrame for better visualization and analysis
train_emotion_df = pd.DataFrame(train_emotion_scores)
test_emotion_df = pd.DataFrame(test_emotion_scores)

# Save results to CSV for future use
train_emotion_df.to_csv(r'D:\Downloads\dataset\train_emotion_scores.csv', index=False)
test_emotion_df.to_csv(r'D:\Downloads\dataset\test_emotion_scores.csv', index=False)

print("Emotion scores calculated and saved to CSV.")




Calculating Emotion Scores:   0%|          | 0/343256 [00:00<?, ?it/s]

In [4]:
train_emotion_df.iloc[0]

NameError: name 'train_emotion_df' is not defined