In [10]:
import pandas as pd
import numpy as np
from tqdm import tqdm
books = pd.read_csv("books_with_categories.csv")

In [11]:
from transformers import pipeline
classifier = pipeline("text-classification",
                      model="j-hartmann/emotion-english-distilroberta-base",
                      top_k = None)
classifier("I love this!")

Device set to use cpu


[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.008528673090040684},
  {'label': 'neutral', 'score': 0.005764589179307222},
  {'label': 'anger', 'score': 0.004419779404997826},
  {'label': 'sadness', 'score': 0.002092391485348344},
  {'label': 'disgust', 'score': 0.0016119909705594182},
  {'label': 'fear', 'score': 0.00041385178337804973}]]

In [12]:
classifier(books["description"][0])

[[{'label': 'fear', 'score': 0.6548418998718262},
  {'label': 'neutral', 'score': 0.16985182464122772},
  {'label': 'sadness', 'score': 0.11640854179859161},
  {'label': 'surprise', 'score': 0.020700592547655106},
  {'label': 'disgust', 'score': 0.01910073682665825},
  {'label': 'joy', 'score': 0.015161239542067051},
  {'label': 'anger', 'score': 0.00393515033647418}]]

In [None]:
classifier(books["description"][0].split("."))

[[{'label': 'surprise', 'score': 0.7296027541160583},
  {'label': 'neutral', 'score': 0.14038576185703278},
  {'label': 'fear', 'score': 0.06816209107637405},
  {'label': 'joy', 'score': 0.04794240742921829},
  {'label': 'anger', 'score': 0.00915635284036398},
  {'label': 'disgust', 'score': 0.0026284719351679087},
  {'label': 'sadness', 'score': 0.0021221607457846403}],
 [{'label': 'neutral', 'score': 0.449370414018631},
  {'label': 'disgust', 'score': 0.27359241247177124},
  {'label': 'joy', 'score': 0.10908260941505432},
  {'label': 'sadness', 'score': 0.09362703561782837},
  {'label': 'anger', 'score': 0.040478307753801346},
  {'label': 'surprise', 'score': 0.02697017230093479},
  {'label': 'fear', 'score': 0.006879065651446581}],
 [{'label': 'neutral', 'score': 0.6462168097496033},
  {'label': 'sadness', 'score': 0.2427326887845993},
  {'label': 'disgust', 'score': 0.04342268034815788},
  {'label': 'surprise', 'score': 0.028300466015934944},
  {'label': 'joy', 'score': 0.014211482

In [14]:
sentences = books["description"][0].split(".")
predictions = classifier(sentences)

In [15]:
sentences[0]

'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives'

In [16]:

predictions[0]

[{'label': 'surprise', 'score': 0.7296027541160583},
 {'label': 'neutral', 'score': 0.14038576185703278},
 {'label': 'fear', 'score': 0.06816209107637405},
 {'label': 'joy', 'score': 0.04794240742921829},
 {'label': 'anger', 'score': 0.00915635284036398},
 {'label': 'disgust', 'score': 0.0026284719351679087},
 {'label': 'sadness', 'score': 0.0021221607457846403}]

In [17]:

sorted(predictions[0], key=lambda x: x["label"])

[{'label': 'anger', 'score': 0.00915635284036398},
 {'label': 'disgust', 'score': 0.0026284719351679087},
 {'label': 'fear', 'score': 0.06816209107637405},
 {'label': 'joy', 'score': 0.04794240742921829},
 {'label': 'neutral', 'score': 0.14038576185703278},
 {'label': 'sadness', 'score': 0.0021221607457846403},
 {'label': 'surprise', 'score': 0.7296027541160583}]

In [18]:
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbns = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_pred = sorted(prediction, key=lambda x: x["label"])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label] = sorted_pred[index]["score"]
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}           
    

In [19]:
for i in range(10):
    isbns.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

In [20]:
emotion_scores

{'anger': [0.06413363665342331,
  0.005810616537928581,
  0.06413363665342331,
  0.06413363665342331,
  0.06413363665342331,
  0.06413363665342331,
  0.06413363665342331,
  0.06413363665342331,
  0.06413363665342331,
  0.06413363665342331],
 'disgust': [0.10400672256946564,
  0.0029196084942668676,
  0.10400672256946564,
  0.10400672256946564,
  0.10400672256946564,
  0.10400672256946564,
  0.10400672256946564,
  0.10400672256946564,
  0.10400672256946564,
  0.10400672256946564],
 'fear': [0.05136279761791229,
  0.001382041140459478,
  0.05136279761791229,
  0.05136279761791229,
  0.05136279761791229,
  0.05136279761791229,
  0.05136279761791229,
  0.05136279761791229,
  0.05136279761791229,
  0.05136279761791229],
 'joy': [0.040564414113759995,
  0.012705693021416664,
  0.040564414113759995,
  0.040564414113759995,
  0.040564414113759995,
  0.040564414113759995,
  0.040564414113759995,
  0.040564414113759995,
  0.040564414113759995,
  0.040564414113759995],
 'sadness': [0.549476742744

In [21]:
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbns = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbns.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [14:28<00:00,  5.99it/s] 


In [22]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbns

In [23]:
books = pd.merge(books, emotions_df, on="isbn13", )

In [24]:
books.to_csv("books_with_emotions.csv", index = False)

In [28]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_and_subtitle,tagged_description,simple_categories,anger,disgust,fear,joy,sadness,surprise,neutral
4464,9781406934854,1406934852,A Commentary Upon the Gospel According to S. Luke,Saint Cyril (patriarch of Alexandria),History,http://books.google.com/books/content?id=Mclsy...,This is a reproduction of the original artefac...,2019.0,3.59,362.0,...,A Commentary Upon the Gospel According to S. Luke,9781406934854 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4462,9781406917895,1406917893,"Israel's Shepherd; Or, Thoughts on the Love of...",Hardpress,History,http://books.google.com/books/content?id=1q9_y...,This is a reproduction of the original artefac...,2019.0,3.72,46.0,...,"Israel's Shepherd; Or, Thoughts on the Love of...",9781406917895 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4461,9781406904833,140690483X,Nöddebo Parsonage,Bentley,,http://books.google.com/books/content?id=OFJyy...,This is a reproduction of the original artefac...,2019.0,4.32,224.0,...,Nöddebo Parsonage:A Story of Country Life in D...,9781406904833 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4466,9781406957242,1406957240,The Story of the Life of Lafayette,Mrs John Farrar,History,http://books.google.com/books/content?id=oVd-y...,This is a reproduction of the original artefac...,2019.0,3.93,368.0,...,The Story of the Life of Lafayette:As Told by ...,9781406957242 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4463,9781406922905,1406922900,"The Pronouncing Reading Book for Children, Wit...",William L Robinson,History,http://books.google.com/books/content?id=gyiJy...,This is a reproduction of the original artefac...,2019.0,3.66,92.0,...,"The Pronouncing Reading Book for Children, Wit...",9781406922905 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4533,9781421504612,1421504618,Fullmetal Alchemist,Hiromu Arakawa,Comics & Graphic Novels,,"Barry the Chopper, the psychopathic killer who...",2006.0,4.60,200.0,...,Fullmetal Alchemist,"9781421504612 Barry the Chopper, the psychopat...",Fiction,0.006604,0.017999,0.961003,0.000765,0.001755,0.009981,0.001892
3089,9780679735250,0679735259,Dispatches,Michael Herr,History,http://books.google.com/books/content?id=5Trkp...,A documentation of the day-to-day realities of...,1991.0,4.23,260.0,...,Dispatches,9780679735250 A documentation of the day-to-da...,Nonfiction,0.003905,0.005123,0.978725,0.000765,0.006079,0.003923,0.001480
3014,9780671742515,0671742515,Long Dark Tea-Time of the Soul,Douglas Adams,Fiction,http://books.google.com/books/content?id=h7pjd...,When a passenger check-in desk shoots through ...,1990.0,4.06,307.0,...,Long Dark Tea-Time of the Soul,9780671742515 When a passenger check-in desk s...,Fiction,0.895688,0.030328,0.055733,0.000562,0.005823,0.003501,0.008365
3070,9780679722946,0679722947,Running Dog,Don DeLillo,Fiction,http://books.google.com/books/content?id=YqtSn...,A woman reporter and a staff assistant to an A...,1989.0,3.42,256.0,...,Running Dog,9780679722946 A woman reporter and a staff ass...,Fiction,0.005203,0.985241,0.005723,0.000556,0.000981,0.001518,0.000779


In [29]:
books.sort_values(by="joy", ascending=False, inplace=True)

In [30]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_and_subtitle,tagged_description,simple_categories,anger,disgust,fear,joy,sadness,surprise,neutral
4464,9781406934854,1406934852,A Commentary Upon the Gospel According to S. Luke,Saint Cyril (patriarch of Alexandria),History,http://books.google.com/books/content?id=Mclsy...,This is a reproduction of the original artefac...,2019.0,3.59,362.0,...,A Commentary Upon the Gospel According to S. Luke,9781406934854 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4462,9781406917895,1406917893,"Israel's Shepherd; Or, Thoughts on the Love of...",Hardpress,History,http://books.google.com/books/content?id=1q9_y...,This is a reproduction of the original artefac...,2019.0,3.72,46.0,...,"Israel's Shepherd; Or, Thoughts on the Love of...",9781406917895 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4461,9781406904833,140690483X,Nöddebo Parsonage,Bentley,,http://books.google.com/books/content?id=OFJyy...,This is a reproduction of the original artefac...,2019.0,4.32,224.0,...,Nöddebo Parsonage:A Story of Country Life in D...,9781406904833 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4466,9781406957242,1406957240,The Story of the Life of Lafayette,Mrs John Farrar,History,http://books.google.com/books/content?id=oVd-y...,This is a reproduction of the original artefac...,2019.0,3.93,368.0,...,The Story of the Life of Lafayette:As Told by ...,9781406957242 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
4463,9781406922905,1406922900,"The Pronouncing Reading Book for Children, Wit...",William L Robinson,History,http://books.google.com/books/content?id=gyiJy...,This is a reproduction of the original artefac...,2019.0,3.66,92.0,...,"The Pronouncing Reading Book for Children, Wit...",9781406922905 This is a reproduction of the or...,Nonfiction,0.001282,0.000651,0.000306,0.983100,0.006407,0.001728,0.006526
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4533,9781421504612,1421504618,Fullmetal Alchemist,Hiromu Arakawa,Comics & Graphic Novels,,"Barry the Chopper, the psychopathic killer who...",2006.0,4.60,200.0,...,Fullmetal Alchemist,"9781421504612 Barry the Chopper, the psychopat...",Fiction,0.006604,0.017999,0.961003,0.000765,0.001755,0.009981,0.001892
3089,9780679735250,0679735259,Dispatches,Michael Herr,History,http://books.google.com/books/content?id=5Trkp...,A documentation of the day-to-day realities of...,1991.0,4.23,260.0,...,Dispatches,9780679735250 A documentation of the day-to-da...,Nonfiction,0.003905,0.005123,0.978725,0.000765,0.006079,0.003923,0.001480
3014,9780671742515,0671742515,Long Dark Tea-Time of the Soul,Douglas Adams,Fiction,http://books.google.com/books/content?id=h7pjd...,When a passenger check-in desk shoots through ...,1990.0,4.06,307.0,...,Long Dark Tea-Time of the Soul,9780671742515 When a passenger check-in desk s...,Fiction,0.895688,0.030328,0.055733,0.000562,0.005823,0.003501,0.008365
3070,9780679722946,0679722947,Running Dog,Don DeLillo,Fiction,http://books.google.com/books/content?id=YqtSn...,A woman reporter and a staff assistant to an A...,1989.0,3.42,256.0,...,Running Dog,9780679722946 A woman reporter and a staff ass...,Fiction,0.005203,0.985241,0.005723,0.000556,0.000981,0.001518,0.000779
