In [2]:
import os
import pandas as pd
import numpy as np
from transformers import pipeline
from tqdm import tqdm

# Set working directory
os.chdir(r"C:\Users\MSI\Desktop\BookNavigator")
print("Current working directory:", os.getcwd())

# Load books dataset
books = pd.read_csv(r"C:\Users\MSI\Desktop\Book Sphere\data\books_with_categories_new.csv")

# --- 1️⃣ Average Rating Classification ---
def rating_category(rating):
    if rating >= 4.5:
        return "Excellent"
    elif rating >= 4.0:
        return "Very Good"
    elif rating >= 3.0:
        return "Good"
    elif rating >= 2.0:
        return "Fair"
    elif rating >= 0:
        return "Poor"
    else:
        return "Unknown"

books["rating_category"] = books["average_rating"].apply(rating_category)

# --- 2️⃣ Emotion Analysis ---
# Initialize emotion classifier
classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None,
    device=-1  # CPU
)

# Define emotion labels
emotion_labels = ["anger", "disgust", "fear", "happiness", "sadness", "surprise", "neutral"]

# Function to calculate max emotion scores per book
def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

# Compute emotion scores
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books)), desc="Processing books for emotions"):
    isbn.append(books["isbn13"][i])
    sentences = str(books["description"][i]).split(".")  # Ensure it's string
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

# Create emotion dataframe
emotion_df = pd.DataFrame(emotion_scores)
emotion_df["isbn13"] = isbn

# Merge emotion scores into main books dataframe
books_final = pd.merge(books, emotion_df, on="isbn13")

# Save final CSV in current working directory
output_file = r"C:\Users\MSI\Desktop\BookNavigator\books_with_emotions_and_ratings.csv"
books_final.to_csv(output_file, index=False)

print(f"✅ Combined CSV saved at: {output_file}")


Current working directory: C:\Users\MSI\Desktop\BookNavigator


Device set to use cpu
Processing books for emotions: 100%|██████████| 5197/5197 [07:14<00:00, 11.96it/s]

✅ Combined CSV saved at: C:\Users\MSI\Desktop\BookNavigator\books_with_emotions_and_ratings.csv



