In [22]:
import pandas as pd

books = pd.read_csv("/kaggle/input/books-with-categories/books_with_categories.csv")

In [6]:
from transformers import pipeline
classifier = pipeline("text-classification",
                      model="j-hartmann/emotion-english-distilroberta-base",
                      top_k = 1,
                      device =0)
classifier("I love kaggle!")

Device set to use cuda:0


[[{'label': 'joy', 'score': 0.960947573184967}]]

In [13]:
def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        if not prediction:  # No predictions returned for a sentence
            continue
        # Map labels to scores
        label_to_score = {p["label"]: p["score"] for p in prediction}
        # Append scores, or 0 if missing
        for label in emotion_labels:
            per_emotion_scores[label].append(label_to_score.get(label, 0))
    # Take max for each label, or 0 if no data
    return {label: max(scores) if scores else 0 for label, scores in per_emotion_scores.items()}


from tqdm import tqdm

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    desc = books["description"][i]
    if not isinstance(desc, str) or not desc.strip():
        # Empty or NaN description
        for label in emotion_labels:
            emotion_scores[label].append(0)
        continue
    sentences = [s.strip() for s in desc.split(".") if s.strip()]  # remove empty sentences
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])


  0%|          | 3/5197 [00:00<03:08, 27.55it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 5197/5197 [01:43<00:00, 50.17it/s] 


In [20]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn

In [21]:
emotions_df

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.000000,0.000000,0.983973,0.949027,0.956065,0.729603,0.697846,9780002005883
1,0.594468,0.461991,0.935215,0.704422,0.000000,0.000000,0.891109,9780002261982
2,0.000000,0.000000,0.973285,0.767237,0.000000,0.000000,0.000000,9780006178736
3,0.000000,0.000000,0.436339,0.000000,0.000000,0.000000,0.732685,9780006280897
4,0.000000,0.000000,0.000000,0.000000,0.475881,0.000000,0.890048,9780006280934
...,...,...,...,...,...,...,...,...
5192,0.000000,0.000000,0.923713,0.000000,0.974265,0.000000,0.807058,9788172235222
5193,0.000000,0.000000,0.000000,0.400263,0.000000,0.000000,0.891073,9788173031014
5194,0.000000,0.000000,0.000000,0.942169,0.000000,0.000000,0.344738,9788179921623
5195,0.000000,0.000000,0.409871,0.776052,0.000000,0.000000,0.950763,9788185300535


In [23]:
books = pd.merge(books, emotions_df, on = "isbn13")
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_and_subtitle,tagged_description,simple_categories,anger,disgust,fear,joy,sadness,surprise,neutral
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead:,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.000000,0.000000,0.983973,0.949027,0.956065,0.729603,0.697846
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.594468,0.461991,0.935215,0.704422,0.000000,0.000000,0.891109
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels:,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.000000,0.000000,0.973285,0.767237,0.000000,0.000000,0.000000
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,The Four Loves:,9780006280897 Lewis' work on the nature of lov...,Nonfiction,0.000000,0.000000,0.436339,0.000000,0.000000,0.000000,0.732685
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,The Problem of Pain:,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction,0.000000,0.000000,0.000000,0.000000,0.475881,0.000000,0.890048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Mistaken Identity:,9788172235222 On A Train Journey Home To North...,Fiction,0.000000,0.000000,0.923713,0.000000,0.974265,0.000000,0.807058
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Journey to the East:,9788173031014 This book tells the tale of a ma...,Nonfiction,0.000000,0.000000,0.000000,0.400263,0.000000,0.000000,0.891073
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction,0.000000,0.000000,0.000000,0.942169,0.000000,0.000000,0.344738
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...,Nonfiction,0.000000,0.000000,0.409871,0.776052,0.000000,0.000000,0.950763


In [24]:
books.to_csv("books_with_emotions.csv", index = False)