In [12]:
import pandas as pd
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [3]:
blurbs = pd.read_csv('../raw_data/books_with_blurbs.csv')

In [4]:
blurbs.head()

Unnamed: 0,ISBN,Title,Author,Year,Publisher,Blurb
0,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,"Here, for the first time in paperback, is an o..."
1,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,"The fascinating, true story of the world's dea..."
2,399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,Winnie and Helen have kept each others worst s...
3,425176428,What If?: The World's Foremost Military Histor...,Robert Cowley,2000,Berkley Publishing Group,Historians and inquisitive laymen alike love t...
4,1881320189,Goodbye to the Buttermilk Sky,Julia Oliver,1994,River City Pub,This highly praised first novel by fiction wri...


In [16]:
blurbs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57510 entries, 0 to 57509
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   ISBN       57510 non-null  object
 1   Title      57510 non-null  object
 2   Author     57510 non-null  object
 3   Year       57510 non-null  int64 
 4   Publisher  57510 non-null  object
 5   Blurb      57510 non-null  object
dtypes: int64(1), object(5)
memory usage: 2.6+ MB


In [5]:
songs = pd.read_csv('../raw_data/spotify_millsongdata.csv')
songs.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [13]:
def cleaning(sentence):
    
    # Basic cleaning
    sentence = sentence.strip() ## remove whitespaces
    sentence = sentence.lower() ## lowercase 
    sentence = ''.join(char for char in sentence if not char.isdigit()) ## remove numbers
    
    # Advanced cleaning
    for punctuation in string.punctuation:
        sentence = sentence.replace(punctuation, '') ## remove punctuation
    
    tokenized_sentence = word_tokenize(sentence) ## tokenize 
    stop_words = set(stopwords.words('english')) ## define stopwords
    
    tokenized_sentence_cleaned = [ ## remove stopwords
        w for w in tokenized_sentence if not w in stop_words
    ]
    
    cleaned_sentence = ' '.join(word for word in tokenized_sentence_cleaned)
    
    return cleaned_sentence

In [14]:
cleaning(songs["text"][0])

'look face wonderful face means something special look way smiles sees lucky one fellow shes kind girl makes feel fine could ever believe could mine shes kind girl without im blue ever leaves could could go walk park holds squeezes hand well go walking hours talking things plan shes kind girl makes feel fine could ever believe could mine shes kind girl without im blue ever leaves could could'

In [15]:
blurbs_cleaned = blurbs.copy()
blurbs_cleaned["Blurb_cleaned"] = blurbs_cleaned["Blurb"].apply(cleaning)


In [17]:
blurbs_cleaned.head()

Unnamed: 0,ISBN,Title,Author,Year,Publisher,Blurb,Blurb_cleaned
0,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,"Here, for the first time in paperback, is an o...",first time paperback outstanding military hist...
1,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,"The fascinating, true story of the world's dea...",fascinating true story worlds deadliest diseas...
2,399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,Winnie and Helen have kept each others worst s...,winnie helen kept others worst secrets fifty y...
3,425176428,What If?: The World's Foremost Military Histor...,Robert Cowley,2000,Berkley Publishing Group,Historians and inquisitive laymen alike love t...,historians inquisitive laymen alike love ponde...
4,1881320189,Goodbye to the Buttermilk Sky,Julia Oliver,1994,River City Pub,This highly praised first novel by fiction wri...,highly praised first novel fiction writer juli...


In [19]:
songs_cleaned = songs.copy()
songs_cleaned["text_cleaned"] = songs_cleaned["text"].apply(cleaning)

In [20]:
songs_cleaned.head()

Unnamed: 0,artist,song,link,text,text_cleaned
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...",look face wonderful face means something speci...
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...",take easy please touch gently like summer even...
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...,ill never know go put lousy rotten show boy to...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...,making somebody happy question give take learn...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...,making somebody happy question give take learn...


# Modelo Vader

In [26]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/silvhera/nltk_data...


True

In [27]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Importar el tokenizador Punkt (si no lo has hecho antes)
from nltk import download
download('punkt')

[nltk_data] Downloading package punkt to /home/silvhera/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [28]:
sia = SentimentIntensityAnalyzer()

In [32]:
songs_cleaned.head()

Unnamed: 0,artist,song,link,text,text_cleaned
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...",look face wonderful face means something speci...
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...",take easy please touch gently like summer even...
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...,ill never know go put lousy rotten show boy to...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...,making somebody happy question give take learn...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...,making somebody happy question give take learn...


In [29]:
sia.polarity_scores(songs_cleaned["text_cleaned"][0])

{'neg': 0.0, 'neu': 0.621, 'pos': 0.379, 'compound': 0.9859}

In [33]:
sia.polarity_scores(songs_cleaned["text_cleaned"][0])["compound"]

0.9859

In [37]:
def obtain_compound(text):
    sia = SentimentIntensityAnalyzer()
    scores = sia.polarity_scores(text)
    return scores['compound']

In [38]:
songs_sentiment = songs_cleaned.copy()
songs_sentiment["sentiment"] = songs_cleaned["text_cleaned"].apply(obtain_compound)

In [40]:
blurbs_sentiment = blurbs_cleaned.copy()
blurbs_sentiment["sentiment"] = blurbs_cleaned["Blurb_cleaned"].apply(obtain_compound)

In [51]:
def playlist_by_book(title):
    book_sentiment = blurbs_sentiment.loc[blurbs_sentiment["Title"] == title, "sentiment"].values[0]
    songs_sentiment['abs_dif'] = abs(songs_sentiment['sentiment'] - book_sentiment)
    playlist = songs_sentiment.nsmallest(100, 'abs_dif')
    return playlist

In [41]:
blurbs_sentiment.head()

Unnamed: 0,ISBN,Title,Author,Year,Publisher,Blurb,Blurb_cleaned,sentiment
0,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,"Here, for the first time in paperback, is an o...",first time paperback outstanding military hist...,0.6124
1,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,"The fascinating, true story of the world's dea...",fascinating true story worlds deadliest diseas...,-0.9552
2,399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,Winnie and Helen have kept each others worst s...,winnie helen kept others worst secrets fifty y...,-0.8957
3,425176428,What If?: The World's Foremost Military Histor...,Robert Cowley,2000,Berkley Publishing Group,Historians and inquisitive laymen alike love t...,historians inquisitive laymen alike love ponde...,0.8625
4,1881320189,Goodbye to the Buttermilk Sky,Julia Oliver,1994,River City Pub,This highly praised first novel by fiction wri...,highly praised first novel fiction writer juli...,0.972


In [54]:
playlist_by_book("Harry Potter and the Chamber of Secrets").head(50)

Unnamed: 0,artist,song,link,text,text_cleaned,sentiment,abs_dif
242,Air Supply,Always,/a/air+supply/always_20004946.html,I can see hills touch the sky \r\nHeaven and ...,see hills touch sky heaven earth world always ...,0.5106,0.0
514,Alice Cooper,Refrigerator Heaven,/a/alice+cooper/refrigerator+heaven_20304652.html,"I'm freezing, I'm frozen, I'm icicle blue \r\...",im freezing im frozen im icicle blue low low c...,0.5106,0.0
1520,Black Sabbath,Zeitgeist,/b/black+sabbath/zeitgeist_21061002.html,Astral engines in reverse \r\nI'm falling thr...,astral engines reverse im falling universe amo...,0.5106,0.0
4159,Devo,Gates Of Steel,/d/devo/gates+of+steel_20039649.html,Twist away the gates of steel \r\nUnlock the ...,twist away gates steel unlock secret voice giv...,0.5106,0.0
5710,Extreme,Rock A Bye Bye,/e/extreme/rock+a+bye+bye_20052189.html,If you could only hear \r\nThe silent screams...,could hear silent screams wake dreams nothing ...,0.5106,0.0
6663,George Jones,Big Harlan Taylor,/g/george+jones/big+harlan+taylor_20153445.html,"[Chorus] \r\nOh the ways of the world, and th...",chorus oh ways world wants woman figured twoul...,0.5106,0.0
7378,Hank Snow,Journey My Baby Back Home,/h/hank+snow/journey+my+baby+back+home_2079717...,Pale moon start on your journey I'm pleading t...,pale moon start journey im pleading somewhere ...,0.5106,0.0
7442,Hank Williams Jr.,Always Loving You,/h/hank+williams+jr/always+loving+you_20251032...,"The evening is lonesome babe, \r\nAnd the dea...",evening lonesome babe dead night cruel recall ...,0.5106,0.0
8046,Howard Jones,Will You Still Be There?,/h/howard+jones/will+you+still+be+there_200662...,How many lives have been shaken \r\n'cause a ...,many lives shaken cause simple actions taken t...,0.5106,0.0
8320,Incubus,Defiance,/i/incubus/defiance_20914031.html,You almost had me there \r\nIt was the battin...,almost batting lashes gave away youre bold bey...,0.5106,0.0
