In [None]:
import pandas as pd
import numpy as np

from transformers import pipeline

from tqdm import tqdm

In [None]:
books = pd.read_csv("books_with_categories.csv")

In [None]:
classifier = pipeline("text-classification",
                      model="j-hartmann/emotion-english-distilroberta-base",
                      top_k=None,
                      device="cuda")

classifier("I lover This!")

In [None]:
books["description"][0]

In [None]:
classifier(books["description"][0])

In [None]:
classifier(books["description"][0].split("."))

In [None]:
sentences = books["description"][0].split(".")
predictions = classifier(sentences)

In [None]:
sentences[0]

In [None]:
predictions[0]

In [None]:
sentences[3]

In [None]:
predictions[3]

In [None]:
predictions

In [None]:
sorted(predictions[0], key=lambda x: x["label"])

In [None]:
emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
    per_emotion_score = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        for index, label in enumerate(emotion_labels):
            per_emotion_score[label].append(sorted_predictions[index]["score"])
    return {label: np.max(scores) for label, scores in per_emotion_score.items()}

In [None]:
for i in range(10):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_score = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_score[label])

In [None]:
emotion_scores

In [None]:
emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_score = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_score[label])

In [None]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn

In [None]:
emotions_df

In [None]:
books = pd.merge(books, emotions_df, on = "isbn13")

In [None]:
books

In [None]:
books.to_csv("books_with_emotions.csv", index=False)