In [1]:
# first thing I wanted to do was use latent dirichlet allocation (LDA) with bag of words (bow) and term frequency 
# inverse document frequency (tfidf) to do topic modeling

# I use this as my reference
# https://github.com/susanli2016/NLP-with-Python/blob/master/LDA_news_headlines.ipynb

# I have already done some supervised learning to see predict which era (2006 or 2018) the articles are from, I would 
# like to us multilayer perceptrons (MLP) to do some more predictions

# New York Times Modern Love columns

My favorite part of the New York Times is Modern Love. They are articles submitted and chosen for press on all things love. The column started in 2004 and is still running. I wanted to see if "Modern" has changed between then and now. I use some natural language processing to compare articles written at the start of Modern Love and more currently (2018). 

In [2]:
# import some modules
%matplotlib inline
import numpy as np
import pandas as pd
import scipy
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import re
import spacy
from collections import Counter
import nltk

In [3]:
# I got about 50 articles from 2004 to 2006 and another set of 50 from 2018

import requests
from bs4 import BeautifulSoup

# combine text from multiple urls

urls_list_modern_love = ['https://www.nytimes.com/2005/05/29/fashion/sundaystyles/a-prince-charming-for-the-prom-not-ever-after-though.html',
                        'https://www.nytimes.com/2004/10/31/fashion/just-friends-let-me-read-between-the-lines.html',
                        'https://www.nytimes.com/2005/03/27/fashion/truly-madly-guiltily.html?rref=collection%2Fcolumn%2Fmodern-love&action=click&contentCollection=fashion&region=stream&module=stream_unit&version=search&contentPlacement=4&pgtype=collection',
                        'https://www.nytimes.com/2005/09/11/fashion/sundaystyles/djs-homeless-mommy.html',
                        'https://www.nytimes.com/2006/01/29/fashion/sundaystyles/29LOVE.html',
                        'https://www.nytimes.com/2006/02/26/fashion/sundaystyles/26LOVE.html',
                        'https://www.nytimes.com/2006/05/07/fashion/sundaystyles/07love.html',
                        'https://www.nytimes.com/2006/05/14/fashion/sundaystyles/14love.html',
                        'https://www.nytimes.com/2006/05/21/fashion/sundaystyles/21love.html',
                        
                        'https://www.nytimes.com/2006/05/28/fashion/sundaystyles/28love.html',
                        'https://www.nytimes.com/2006/06/04/fashion/04love.html',
                        'https://www.nytimes.com/2006/06/11/fashion/sundaystyles/11love.html',
                        'https://www.nytimes.com/2006/06/18/fashion/sundaystyles/18love.html',
                        'https://www.nytimes.com/2006/06/25/fashion/25love.html',
                        'https://www.nytimes.com/2006/06/25/fashion/25love.html',
                        'https://www.nytimes.com/2006/07/16/fashion/sundaystyles/16love.html',
                        'https://www.nytimes.com/2006/07/30/fashion/30LOVE.html',
                        'https://www.nytimes.com/2006/08/06/fashion/06love.html',
                        'https://www.nytimes.com/2006/08/13/fashion/13love.html',
                         
                        'https://www.nytimes.com/2006/08/20/fashion/20LOVE.html',
                        'https://www.nytimes.com/2006/08/27/fashion/27love.html',
                        'https://www.nytimes.com/2006/09/03/fashion/03love.html',
                        'https://www.nytimes.com/2006/09/10/fashion/10love.html',
                        'https://www.nytimes.com/2006/09/17/fashion/17love.html',
                        'https://www.nytimes.com/2006/09/24/fashion/24love.html',
                        'https://www.nytimes.com/2006/10/01/fashion/01love.html',
                        'https://www.nytimes.com/2006/10/08/fashion/08love.html',
                        'https://www.nytimes.com/2006/10/15/fashion/15love.html',
                        'https://www.nytimes.com/2006/10/22/fashion/22love.html',
                        
                        'https://www.nytimes.com/2006/10/29/fashion/29love.html',
                        'https://www.nytimes.com/2006/11/05/fashion/05love.html',
                        'https://www.nytimes.com/2006/11/12/fashion/12love.html',
                        'https://www.nytimes.com/2006/11/19/fashion/19love.html',
                        'https://www.nytimes.com/2006/11/26/fashion/26love.html',
                        'https://www.nytimes.com/2006/12/03/fashion/03love.html',
                        'https://www.nytimes.com/2006/12/10/fashion/10love.html',
                        'https://www.nytimes.com/2006/12/17/fashion/17love.html',
                        'https://www.nytimes.com/2006/12/24/fashion/24love.html',
                        'https://www.nytimes.com/2007/01/07/fashion/07love.html',
                        'https://www.nytimes.com/2007/01/14/fashion/14love.html',
                        
                        'https://www.nytimes.com/2007/01/21/fashion/21love.html',
                        'https://www.nytimes.com/2007/01/28/fashion/28love.html',
                        'https://www.nytimes.com/2007/02/04/fashion/04love.html',
                        'https://www.nytimes.com/2007/02/18/fashion/18love.html',
                        'https://www.nytimes.com/2007/02/25/fashion/25love.html',
                        'https://www.nytimes.com/2007/03/04/fashion/04love.html',
                        'https://www.nytimes.com/2007/03/11/fashion/11love.html',
                        'https://www.nytimes.com/2007/03/18/fashion/18love.html',
                        'https://www.nytimes.com/2007/03/25/fashion/25love.html',
                        'https://www.nytimes.com/2007/04/01/fashion/01love.html'
                        ]


nyt_modern_love_2004_2007 = ''
for url in urls_list_modern_love:
    request = requests.get(url)
    soup = BeautifulSoup(request.content, "html5lib")
    url_result = soup.find_all("p", {"class": "story-content"})
    url_result = str(url_result)
    url_result = re.sub(r'--', ' ', url_result)
    url_result = re.sub("(<[^>]+>)", "", url_result)
    nyt_modern_love_2004_2007 = nyt_modern_love_2004_2007 + url_result
    #print(len(nyt_modern_love_2004_2007))
#print((nyt_modern_love_2004_2007))   



In [4]:
urls_list_modern_love_2018 = ['https://www.nytimes.com/2018/12/28/style/modern-love-the-junk-removers-manhandle-my-heart.html',
                              'https://www.nytimes.com/2018/12/21/style/modern-love-mystery-holiday-fruitcake-solved.html',
                              'https://www.nytimes.com/2018/12/14/style/modern-love-your-dog-has-seen-me-naked.html',
                              'https://www.nytimes.com/2018/12/07/style/modern-love-when-a-boyfriend-joins-the-marriage.html',
                              'https://www.nytimes.com/2018/11/30/style/modern-love-marooned-on-love-island.html'
                              'https://www.nytimes.com/2018/11/23/style/destined-to-marry-the-cute-bartender.html',
                              'https://www.nytimes.com/2018/11/16/style/modern-love-i-would-have-driven-her-anywhere-.html',
                              'https://www.nytimes.com/2018/11/09/style/modern-love-keeping-this-fluffy-fragile-beast-alive.html',
                              'https://www.nytimes.com/2018/11/02/style/modern-love-wedding-day-leukemia-swollen-eyes.html',
                              'https://www.nytimes.com/2018/10/26/style/modern-love-im-the-man.html',
                              
                              'https://www.nytimes.com/2018/10/19/style/modern-love-when-sturdy-love-is-what-you-need.html',
                              'https://www.nytimes.com/2018/10/12/style/modern-love-talking-to-my-fiance-about-my-new-girlfriend.html',
                              'https://www.nytimes.com/2018/10/05/style/modern-love-neighbor-bro-smashes-my-romantic-fantasy.html',
                              'https://www.nytimes.com/2018/09/28/style/modern-love-how-i-met-my-children.html',
                              'https://www.nytimes.com/2018/09/21/style/modern-love-pudge-will-keep-us-together.html',
                              'https://www.nytimes.com/2018/09/14/style/modern-love-need-to-find-me-ask-my-ham-man.html',
                              'https://www.nytimes.com/2018/09/07/style/modern-love-he-asked-permission-to-touch-but-not-to-ghost.html',
                              'https://www.nytimes.com/2018/08/31/style/modern-love-honey-i-swept-the-floor.html',
                              'https://www.nytimes.com/2018/08/24/style/modern-love-the-kisses-that-paid-my-rent.html',
                              'https://www.nytimes.com/2018/08/17/style/modern-love-for-13-days-i-believed-him.html',
                              
                              'https://www.nytimes.com/2018/08/10/style/modern-love-driving-2130-miles-for-a-haircut.html',
                              'https://www.nytimes.com/2018/08/03/style/modern-love-how-i-lost-the-fiance-but-won-the-honeymoon.html',
                              'https://www.nytimes.com/2018/07/27/style/modern-love-marriage-class-at-guantanamo.html',
                              'https://www.nytimes.com/2018/07/20/style/modern-love-please-take-my-sons-wallet.html',
                              'https://www.nytimes.com/2018/07/13/style/modern-love-what-boxing-taught-me-about-love.html',
                              'https://www.nytimes.com/2018/07/06/style/modern-love-when-friends-fall-in-love.html',
                              'https://www.nytimes.com/2018/06/29/style/modern-love-hes-going-back-to-his-former-wife-sort-of.html',
                              'https://www.nytimes.com/2018/06/22/style/modern-love-staying-together-by-living-apart.html',
                              'https://www.nytimes.com/2018/06/15/style/modern-love-my-wife-said-you-may-want-to-marry-me.html',
                              'https://www.nytimes.com/2018/06/08/style/modern-love-i-wanted-to-be-dominated-but-not-quite-like-that.html',
                              
                              'https://www.nytimes.com/2018/06/01/style/modern-love-neither-male-nor-female-seems-to-fit.html',
                              'https://www.nytimes.com/2018/05/25/style/modern-love-means-never-having-to-say-anything.html',
                              'https://www.nytimes.com/2018/05/11/style/modern-love-motherless-but-growing-toward-the-light.html',
                              'https://www.nytimes.com/2018/05/04/style/modern-love-alexa-please-ignore-my-husband.html',
                              'https://www.nytimes.com/2018/04/27/style/modern-love-immigration-threat-of-deportation.html',
                              'https://www.nytimes.com/2018/04/20/style/modern-love-flying-close-to-temptation.html',
                              'https://www.nytimes.com/2018/04/13/style/when-love-seems-too-easy-to-trust.html',
                              'https://www.nytimes.com/2018/04/06/style/modern-love-sleeping-with-married-men-infidelity.html',
                              'https://www.nytimes.com/2018/03/30/style/modern-love-for-a-30-year-old-virgin-its-now-or-never.html',
                              'https://www.nytimes.com/2018/03/23/style/modern-love-first-try-the-pastrami-then-the-polyamory.html',
                              
                              'https://www.nytimes.com/2018/03/16/style/modern-love-she-was-my-world-but-we-couldnt-marry.html',
                              'https://www.nytimes.com/2018/03/09/style/modern-love-its-us-against-a-particle-of-dust.html',
                              'https://www.nytimes.com/2018/03/02/style/modern-love-am-i-gay-online-quizzes.html',
                              'https://www.nytimes.com/2018/02/23/style/modern-love-a-future-without-him-widow.html',
                              'https://www.nytimes.com/2018/02/16/style/modern-love-how-lolita-freed-me-from-my-own-humbert.html',
                              'https://www.nytimes.com/2018/02/09/style/modern-love-valentines-day-acted-like-strangers.html',
                              'https://www.nytimes.com/2018/02/02/style/modern-love-when-your-body-tells-you-what-your-brain-wont.html',
                              'https://www.nytimes.com/2018/01/26/style/modern-love-are-bitcoins-more-real-than-boyfriends.html',
                              'https://www.nytimes.com/2018/01/19/style/modern-love-the-bike-that-saved-my-life.html',
                              'https://www.nytimes.com/2018/01/12/style/modern-love-hookup-ghosting-use-your-words.html'
    

    
]
nyt_modern_love_2018 = ''
for url in urls_list_modern_love_2018:
    request = requests.get(url)
    soup = BeautifulSoup(request.content, "html5lib")
    #url_result = soup.find_all("p", {"name": "articleBody"})
    url_result = soup.findAll('div',attrs={"class":"StoryBodyCompanionColumn"})
    url_result = str(url_result)
    url_result = re.sub(r'--', ' ', url_result)
    url_result = re.sub("(<[^>]+>)", "", url_result)
    nyt_modern_love_2018 = nyt_modern_love_2018 + url_result
    #print(len(nyt_modern_love_2018))
#print((nyt_modern_love_2018))



In [5]:
# do some cleaning

nyt_modern_love_2004_2007 = re.sub('E-mail: modernlove@nytimes.com, MODERN LOVE', '', nyt_modern_love_2004_2007)


nyt_modern_love_2004_2007 = re.sub('UPDATED: You can now hear this essay read by the actress Minnie Driver in Modern Love: The Podcast. Look for the “play” button below or subscribe on iTunes or Google Play Music. , ', '', nyt_modern_love_2004_2007)
nyt_modern_love_2004_2007 = re.sub('UPDATED: You can now hear this essay read by the actress Catherine Keener in Modern Love: The Podcast. Look for the “play” button below or subscribe on iTunes or Google Play Music., ', '', nyt_modern_love_2004_2007)
nyt_modern_love_2004_2007 = re.sub('UPDATED: You can now hear this essay read by the actor John Cho in Modern Love:'
                                   'The Podcast. Look for the “play” button below or subscribe on iTunes or Google '
                                   'Play Music.,', '', nyt_modern_love_2004_2007)
nyt_modern_love_2004_2007 = re.sub('UPDATED: You can now hear this essay read by Sarah Silverman, followed by a '
                                   'conversation with the writer Angela Balcita and her husband Christopher Doyle,'
                                   ' in Modern Love: The Podcast. Look for the “play” button below.,', '',
                                   nyt_modern_love_2004_2007)

nyt_modern_love_2018 = re.sub('Modern Love can be reached at modernlove@nytimes.com.To hear Modern Love: The Podcast, subscribe on iTunes or Google Play Music. To read past Modern Love columns, click here.'
'Continue following our fashion and lifestyle coverage on Facebook (Styles and Modern Love), Twitter'
'(Styles, Fashion and Weddings) and Instagram.', '', nyt_modern_love_2018)

nyt_modern_love_2018 = re.sub('Modern Love can be reached at modernlove@nytimes.com.To hear Modern Love: The Podcast, subscribe on iTunes or Google Play Music. To read past Modern Love columns, click here. Continue following our fashion and lifestyle coverage on Facebook (Styles and Modern Love), Twitter (Styles, Fashion and Weddings) and Instagram.', '', nyt_modern_love_2018)
nyt_modern_love_2018 = re.sub(r'UPDATED.*', '', nyt_modern_love_2018)
nyt_modern_love_2018 = nyt_modern_love_2018.replace('[', '')
nyt_modern_love_2018 = nyt_modern_love_2018.replace(']', '')
nyt_modern_love_2018 = nyt_modern_love_2018.replace('Modern Love can be reached at modernlove@nytimes.com.To hear Modern Love: The Podcast, subscribe on iTunes or Google Play Music. To read past Modern Love columns, click here. Continue following our fashion and lifestyle coverage on Facebook (Styles and Modern Love), Twitter (Styles, Fashion and Weddings) and Instagram.', '')


nyt_modern_love_2004_2007 = nyt_modern_love_2004_2007.replace('[', '')
nyt_modern_love_2004_2007 = nyt_modern_love_2004_2007.replace(']', '')

nyt_modern_love_2004_2007 = nyt_modern_love_2004_2007.replace('UPDATED: You can now hear this essay read by\xa0the actress Catherine Keener in Modern Love: The Podcast. Look for the “play” button below or subscribe on\xa0iTunes\xa0or\xa0Google Play Music.,', '')
nyt_modern_love_2004_2007 = nyt_modern_love_2004_2007.replace('UPDATED: You can now hear this essay read by the actor John Cho in Modern Love: The Podcast. Look for the “play” button below or subscribe on iTunes or Google Play Music.,', '')
#nyt_modern_love_2004_2007 = nyt_modern_love_2004_2007.replace('"\"', '')

nyt_modern_love_2004_2007[:100]


'LATELY I\'ve become wary of the question "Frank, what are you doing next Saturday night?" In the mont'

In [6]:
nlp = spacy.load('en')
nytML200407_doc = nlp(nyt_modern_love_2004_2007)
nytML2018_doc = nlp(nyt_modern_love_2018)


ML2004_sents = [[sent, '2004'] for sent in nytML200407_doc.sents]
ML2018_sents = [[sent, '2018'] for sent in nytML2018_doc.sents]

sentences = pd.DataFrame(ML2004_sents + ML2018_sents)
sentences.head()

Unnamed: 0,0,1
0,(LATELY),2004
1,"(I, 've, become, wary, of, the, question, "", F...",2004
2,"("", In, the, month, of, May, it, can, only, me...",2004
3,"(And, no, ,, I, 'm, not, doing, a, favor, for,...",2004
4,"(Cousins, are, out, .)",2004


In [9]:
# starting LDA
import gensim
from gensim.models import word2vec
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np
np.random.seed(2018)
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /Users/melia/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [11]:
# get lemmas into processed document, using stems
stemmer = SnowballStemmer('english')

def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

def preprocess(text):
    result = []
    for token in gensim.utils.simple_preprocess(text):
        if token not in gensim.parsing.preprocessing.STOPWORDS and len(token) > 3:
            result.append(lemmatize_stemming(token))
    return result

doc = sentences[0].astype(str)

processed_doc = doc.map(preprocess)

In [12]:
processed_doc[-10:]

11352                                                   []
11353                      [want, nice, feel, oblig, know]
11354    [want, nice, mean, make, sens, say, tangl, han...
11355        [wander, casual, expect, yield, relationship]
11356    [understand, guy, think, flatteri, bedroom, we...
11357              [love, date, wonder, think, want, date]
11358                    [requir, flatteri, deserv, ghost]
11359                                 [hookup, need, mean]
11360                                               [mean]
11361                                               [word]
Name: 0, dtype: object

In [13]:
dictionary = gensim.corpora.Dictionary(processed_doc)

In [14]:
dictionary.filter_extremes(no_below=15, no_above=0.5, keep_n=100000)

In [15]:
bow_corpus = [dictionary.doc2bow(doc) for doc in processed_doc]
bow_corpus[11001]

bow_dooc_11001 = bow_corpus[11001]
for i in range(len(bow_dooc_11001)):
    print('word {} (\"{}\") appears {} time.'.format(bow_dooc_11001[i][0], dictionary[bow_dooc_11001[i][0]], bow_dooc_11001[i][1]))
    
    
    

word 1 ("night") appears 1 time.
word 79 ("stori") appears 2 time.
word 173 ("end") appears 1 time.
word 270 ("write") appears 1 time.
word 413 ("song") appears 1 time.


In [16]:
from gensim import corpora, models

tfidf = models.TfidfModel(bow_corpus)
corpus_tfidf = tfidf[bow_corpus]

In [19]:
num_topc = 5
# lda using bow
lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics=num_topc, id2word=dictionary, passes=2, workers=2)
for idx, topic in lda_model.print_topics(-1):
    print('topic: {} \nWords: {}'.format(idx, topic))

topic: 0 
Words: 0.059*"say" + 0.025*"want" + 0.023*"friend" + 0.022*"come" + 0.014*"look" + 0.014*"tell" + 0.011*"time" + 0.010*"husband" + 0.009*"open" + 0.008*"need"
topic: 1 
Words: 0.014*"ask" + 0.013*"time" + 0.013*"talk" + 0.012*"relationship" + 0.012*"work" + 0.011*"like" + 0.011*"feel" + 0.010*"say" + 0.010*"couldn" + 0.009*"call"
topic: 2 
Words: 0.032*"year" + 0.019*"know" + 0.019*"time" + 0.018*"life" + 0.018*"live" + 0.014*"later" + 0.014*"like" + 0.011*"home" + 0.010*"tri" + 0.009*"think"
topic: 3 
Words: 0.034*"know" + 0.030*"want" + 0.018*"love" + 0.014*"hand" + 0.013*"need" + 0.011*"read" + 0.010*"have" + 0.009*"say" + 0.009*"time" + 0.009*"take"
topic: 4 
Words: 0.030*"like" + 0.026*"think" + 0.026*"love" + 0.022*"go" + 0.021*"mother" + 0.016*"thing" + 0.015*"tell" + 0.013*"month" + 0.011*"wasn" + 0.009*"know"


In [20]:
# LDA using tfidf

lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=num_topc, id2word=dictionary, passes=2, workers=4)
for idx, topic in lda_model_tfidf.print_topics(-1):
    print('topic: {} Word: {}'.format(idx, topic))
    

topic: 0 Word: 0.027*"know" + 0.018*"look" + 0.016*"year" + 0.014*"mayb" + 0.012*"work" + 0.011*"think" + 0.011*"husband" + 0.010*"right" + 0.009*"come" + 0.008*"begin"
topic: 1 Word: 0.025*"love" + 0.018*"think" + 0.012*"like" + 0.010*"ask" + 0.010*"date" + 0.010*"felt" + 0.010*"happen" + 0.010*"talk" + 0.009*"peopl" + 0.008*"tell"
topic: 2 Word: 0.036*"want" + 0.021*"like" + 0.015*"say" + 0.012*"hand" + 0.012*"take" + 0.011*"feel" + 0.011*"kiss" + 0.010*"know" + 0.009*"hold" + 0.009*"love"
topic: 3 Word: 0.019*"go" + 0.017*"say" + 0.017*"tell" + 0.014*"come" + 0.011*"thing" + 0.008*"friend" + 0.008*"leav" + 0.008*"like" + 0.008*"time" + 0.008*"couldn"
topic: 4 Word: 0.040*"say" + 0.016*"life" + 0.015*"time" + 0.013*"wasn" + 0.012*"month" + 0.012*"need" + 0.011*"mother" + 0.010*"live" + 0.010*"love" + 0.008*"hear"


In [22]:
# check performance LDA with bow

for indx, score in sorted(lda_model[bow_corpus[11001]], key=lambda tup: -1*tup[1]):
    print('\nScore {} \nTopic: {}'.format(score, lda_model.print_topic(indx, 10)))


Score 0.80403071641922 
Topic: 0.030*"like" + 0.026*"think" + 0.026*"love" + 0.022*"go" + 0.021*"mother" + 0.016*"thing" + 0.015*"tell" + 0.013*"month" + 0.011*"wasn" + 0.009*"know"

Score 0.10888399928808212 
Topic: 0.059*"say" + 0.025*"want" + 0.023*"friend" + 0.022*"come" + 0.014*"look" + 0.014*"tell" + 0.011*"time" + 0.010*"husband" + 0.009*"open" + 0.008*"need"

Score 0.029159720987081528 
Topic: 0.014*"ask" + 0.013*"time" + 0.013*"talk" + 0.012*"relationship" + 0.012*"work" + 0.011*"like" + 0.011*"feel" + 0.010*"say" + 0.010*"couldn" + 0.009*"call"

Score 0.029118912294507027 
Topic: 0.034*"know" + 0.030*"want" + 0.018*"love" + 0.014*"hand" + 0.013*"need" + 0.011*"read" + 0.010*"have" + 0.009*"say" + 0.009*"time" + 0.009*"take"

Score 0.028806647285819054 
Topic: 0.032*"year" + 0.019*"know" + 0.019*"time" + 0.018*"life" + 0.018*"live" + 0.014*"later" + 0.014*"like" + 0.011*"home" + 0.010*"tri" + 0.009*"think"


In [23]:
# check performance LDA with tfidf

for indx, score in sorted(lda_model_tfidf[bow_corpus[11001]], key=lambda tup: -1*tup[1]):
    print('\nScore: {} \nTopic: {}'.format(score, lda_model_tfidf.print_topic(indx, 10)))


Score: 0.4552915096282959 
Topic: 0.027*"know" + 0.018*"look" + 0.016*"year" + 0.014*"mayb" + 0.012*"work" + 0.011*"think" + 0.011*"husband" + 0.010*"right" + 0.009*"come" + 0.008*"begin"

Score: 0.2674643397331238 
Topic: 0.040*"say" + 0.016*"life" + 0.015*"time" + 0.013*"wasn" + 0.012*"month" + 0.012*"need" + 0.011*"mother" + 0.010*"live" + 0.010*"love" + 0.008*"hear"

Score: 0.21857912838459015 
Topic: 0.036*"want" + 0.021*"like" + 0.015*"say" + 0.012*"hand" + 0.012*"take" + 0.011*"feel" + 0.011*"kiss" + 0.010*"know" + 0.009*"hold" + 0.009*"love"

Score: 0.029512085020542145 
Topic: 0.025*"love" + 0.018*"think" + 0.012*"like" + 0.010*"ask" + 0.010*"date" + 0.010*"felt" + 0.010*"happen" + 0.010*"talk" + 0.009*"peopl" + 0.008*"tell"

Score: 0.029152974486351013 
Topic: 0.019*"go" + 0.017*"say" + 0.017*"tell" + 0.014*"come" + 0.011*"thing" + 0.008*"friend" + 0.008*"leav" + 0.008*"like" + 0.008*"time" + 0.008*"couldn"


In [None]:
# questions

# how to do stats on topics/clusters
# how to see if authors are grouped into similar clusters
# how to incorporate NMF
# what techniques to use for unsupervised learning feature generation/selection? LSA? Others?
