In [None]:
import pandas as pd
from transformers import pipeline
import numpy as np
from tqdm import tqdm

books = pd.read_csv("books_with_categories.csv")

In [None]:
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k = None)

In [None]:
classifier("I love this")

In [None]:
classifier("I hate this")

In [None]:
classifier(books["description"][0].split("."))

In [None]:
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_score(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [None]:
for i in range(10):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_score(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

In [None]:
emotion_scores

In [None]:
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_score(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])  

In [None]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn

In [None]:
emotions_df

In [None]:
books = pd.merge(books, emotions_df, on="isbn13")

In [None]:
books

In [None]:
books.to_csv("books_with_emotions.csv", index=False)