In [1]:
# The usual suspects (and json)
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# NLP libraries
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import TruncatedSVD, NMF
from nltk.tokenize import TreebankWordTokenizer, TweetTokenizer
from nltk.stem import PorterStemmer
from nltk import SnowballStemmer
from sklearn.manifold import TSNE
import nltk

from helper_functions.lyrics_cleaners import rep_linebrk, display_topics

# Pipeline functions
from helper_functions.pipeline import NLPPipe, tweet_clean1

# # Helper functions
# from Py_Files.helper_functions import txt_to_df, scatter, display_topics

# Visualization packages
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib
%matplotlib inline
import seaborn as sns

import pickle
%load_ext autoreload
%autoreload 2

In [2]:
all_proj = pd.read_pickle("AllBeatlesProj.pkl")

In [3]:
all_proj = all_proj.reset_index()
all_proj = all_proj.drop_duplicates(subset='song_title').reset_index().drop(columns='index')
all_proj['lyrics'] = all_proj['lyrics'].apply(lambda x: rep_linebrk(x))

In [4]:
corpus_list = all_proj['lyrics'].tolist()
# Let's turn all of the tweets into a list, so our Pipeline
# can work with our data better

In [5]:
stopwords = nltk.corpus.stopwords.words('english')
stopwords.append('oh')
stopwords.append('ve')
stopwords.append('ll')
stopwords.append('ooh')
stopwords.append('na')
stopwords.append('la')
stopwords.append('ca')
stopwords.append('wo')
stopwords.append('ah')
stopwords.append('yeah')
stopwords.append('oo')
stopwords.append('woah')
stopwords.append('well')
stopwords.append('doo')
stopwords.append('da')
stopwords.append('mm')
stopwords.append('mmm')
stopwords.append('uh')
stopwords.append('hey')
stopwords.append('go')
stopwords.append('know')
stopwords.append('like')
stopwords.append('see')
stopwords.append('get')
stopwords.append('got')
stopwords.append('one')
stopwords.append('way')
stopwords.append('could')
stopwords.append('make')
stopwords.append('thing')
stopwords.append('whoa')

In [6]:
nlp = NLPPipe(vectorizer=TfidfVectorizer(stop_words=stopwords, max_df=0.80, min_df=10), 
              tokenizer=TweetTokenizer().tokenize, 
              stemmer=SnowballStemmer("english",ignore_stopwords=True))
# TF-IDF may be better for our dataset. It works better with sparse datasets

In [7]:
nlp.fit(corpus_list)
dtm_tfidf = nlp.transform(corpus_list)
# Fit the corpus and transform the corpus

In [18]:
nmf_model = NMF(5, random_state=1)
doc_topic = nmf_model.fit_transform(dtm_tfidf)
display_topics(nmf_model, nlp.vectorizer.get_feature_names(), 25)
# These topics look a bit better than LDA.
# 0 - IDK, # 1 - Love, # 2 - Relationships, # 3 - Longing, # 4 - Desire, # 5 - Rock N Roll


Topic  0
time, look, take, never, day, life, world, let, good, littl, away, feel, long, tell, peopl, live, back, think, heart, right, night, girl, play, would, say

Topic  1
love, true, need, give, feel, heart, forev, away, song, never, alway, ever, pleas, tell, hold, say, long, noth, find, word, mine, send, girl, kiss, wrong

Topic  2
babi, say, let, man, said, cri, littl, pleas, woman, mayb, fine, bye, run, shake, honey, wait, long, keep, drive, girl, ride, leav, tri, song, everybodi

Topic  3
come, back, home, said, easi, keep, move, pleas, sun, someth, fight, alon, forget, along, til, open, everyth, whatev, tonight, wait, alright, everybodi, flower, away, honey

Topic  4
want, wanna, give, money, danc, tell, need, say, everyth, yes, tonight, tri, someth, peopl, right, night, drive, truth, peac, mind, anyth, woo, free, face, feel


In [19]:
H = pd.DataFrame(doc_topic.round(5),
             index = corpus_list,
             columns = range(5))

In [23]:
H.head(3)

Unnamed: 0,0,1,2,3,4
"One, two, three, four One, two... (One, two, three, four) Let me tell you how it will be There's one for you, nineteen for me 'Cause I'm the taxman Yeah, I'm the taxman Should five percent appear too small Be thankful I don't take it all 'Cause I'm the taxman Yeah, I'm the taxman If you drive a car, car, I'll tax the street If you try to sit, sit, I'll tax your seat If you get too cold, cold, I'll tax the heat If you take a walk, walk, I'll tax your feet Taxman 'Cause I'm the taxman Yeah, I'm the taxman Don't ask me what I want it for (Haha, Mr. Wilson) If you don't want to pay some more (Haha, Mr. Heath) 'Cause I'm the taxman Yeah, I'm the taxman Now my advice for those who die (Taxman!) Declare the pennies on your eyes (Taxman!) Cause I'm the taxman Yeah, I'm the taxman And you're working for no one but me (Taxman!)",0.0625,0.0,0.0275,0.0,0.07723
"Ah, look at all the lonely people! Ah, look at all the lonely people! Eleanor Rigby Picks up the rice in the church where a wedding has been Lives in a dream Waits at the window Wearing the face that she keeps in a jar by the door Who is it for? All the lonely people Where do they all come from? All the lonely people Where do they all belong? Father McKenzie Writing the words of a sermon that no one will hear No one comes near Look at him working Darning his socks in the night when there's nobody there What does he care? All the lonely people Where do they all come from? All the lonely people Where do they all belong? Ah, look at all the lonely people! Ah, look at all the lonely people! Eleanor Rigby Died in the church and was buried along with her name Nobody came Father McKenzie Wiping the dirt from his hands as he walks from the grave No one was saved All the lonely people (Ah, look at all the lonely people!) Where do they all come from? All the lonely people (Ah, look at all the lonely people!) Where do they all belong?",0.07139,0.0,0.0,0.06725,0.01732
"When I wake up early in the morning Lift my head, I'm still yawning When I'm in the middle of a dream Stay in bed, float up stream (Float up stream) Please, don't wake me, no, don't shake me Leave me where I am, I'm only sleeping Everybody seems to think I'm lazy I don't mind, I think they're crazy Running everywhere at such a speed Till they find there's no need (There's no need) Please, don't spoil my day, I'm miles away And after all, I'm only sleeping Keeping an eye on the world going by my window Taking my time Lying there and staring at the ceiling Waiting for a sleepy feeling Please, don't spoil my day, I'm miles away And after all, I'm only sleeping Keeping an eye on the world going by my window Taking my time When I wake up early in the morning Lift my head, I'm still yawning When I'm in the middle of a dream Stay in bed, float up stream Please, don't wake me, no, don't shake me Leave me where I am, I'm only sleeping",0.10126,0.00376,0.0083,0.0094,0.0


In [26]:
all_proj['song_topic'] = H.idxmax(axis=1).tolist()

In [28]:
plotting_x = TSNE(random_state=0,perplexity=50,metric='cosine').fit_transform(H)

In [29]:
all_proj['x'] = plotting_x[:,1]
all_proj['y'] = plotting_x[:,0]

In [30]:
all_proj

Unnamed: 0,song_title,artist,album_title,release_date,lyrics,singer,song_topic,x,y
0,Taxman,The Beatles,Revolver (UK),1966-08-05 00:00:00,"One, two, three, four One, two... (One, two, ...",Harrison,4,3.705686,-17.085106
1,Eleanor Rigby,The Beatles,Revolver (UK),1966-08-05 00:00:00,"Ah, look at all the lonely people! Ah, look at...",McCartney,0,3.190551,13.226368
2,I'm Only Sleeping,The Beatles,Revolver (UK),1966-08-05 00:00:00,When I wake up early in the morning Lift my he...,Lennon,0,13.038831,2.706480
3,Love You To,The Beatles,Revolver (UK),1966-08-05 00:00:00,"Each day just goes so fast I turn around, it's...",Harrison,1,-17.362219,-5.855365
4,"Here, There and Everywhere",The Beatles,Revolver (UK),1966-08-05 00:00:00,To lead a better life I need my love to be her...,McCartney,1,-15.858483,-7.347853
...,...,...,...,...,...,...,...,...,...
866,Red and Black Blues,Ringo Starr,Stop and Smell the Roses,1981-10-27 00:00:00,"Ended a worker, according to plan 'Cause you'r...",Starr,0,7.809428,2.286024
867,Brandy,Ringo Starr,Stop and Smell the Roses,1981-10-27 00:00:00,By the open fireplace in my favorite dungarees...,Starr,3,1.580441,16.107670
868,Stop and Take the Time to Smell the Other Roses,Ringo Starr,Stop and Smell the Roses,1981-10-27 00:00:00,"One, two, three, four Stop and take the time t...",Starr,0,7.411470,-11.458018
869,You Can’t Fight Lightning,Ringo Starr,Stop and Smell the Roses,1981-10-27 00:00:00,"I said, you can't fight lightning Said, you ca...",Starr,3,-0.703041,21.020184


In [8]:
from sklearn.decomposition import LatentDirichletAllocation
import pyLDAvis
import pyLDAvis.sklearn
# There are a few different ways to get topics. Let's see how LDA works with out data.

In [29]:
pyLDAvis.enable_notebook()
lda_tfidf = LatentDirichletAllocation(n_components=5, random_state=0)
lda_tfidf.fit(dtm_tfidf)
pyLDAvis.sklearn.prepare(lda_tfidf, dtm_tfidf, nlp.vectorizer)

  and should_run_async(code)
