In [1]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\AntonYashuk\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [2]:
import spacy
spacy.load('en')
from spacy.lang.en import English
parser = English()

def tokenize(text):
    lda_tokens = []
    tokens = parser(text)
    for token in tokens:
        if token.orth_.isspace():
            continue
        elif token.like_url:
            lda_tokens.append('URL')
        elif token.orth_.startswith('@'):
            lda_tokens.append('SCREEN_NAME')
        else:
            lda_tokens.append(token.lower_)
    return lda_tokens

In [3]:
from nltk.corpus import wordnet as wn
def get_lemma(word):
    lemma = wn.morphy(word)
    if lemma is None:
        return word
    else:
        return lemma
    
from nltk.stem.wordnet import WordNetLemmatizer
def get_lemma2(word):
    return WordNetLemmatizer().lemmatize(word)

In [4]:
for w in ['dogs', 'ran', 'discouraged']:
    print(w, get_lemma(w), get_lemma2(w))

dogs dog dog
ran run ran
discouraged discourage discouraged


In [5]:
nltk.download('stopwords')
en_stop = set(nltk.corpus.stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\AntonYashuk\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
def prepare_text_for_lda(text):
    tokens = tokenize(text)
    tokens = [token for token in tokens if len(token) > 4]
    tokens = [token for token in tokens if token not in en_stop]
    tokens = [get_lemma(token) for token in tokens]
    return tokens

In [10]:
import random
text_data = []
with open('data.csv', encoding="utf8") as f:
    for line in f:
        tokens = prepare_text_for_lda(line)
        if random.random() > .99:
            print(tokens)
            text_data.append(tokens)

['slate', 'colored', 'light', 'slant', 'skylight', 'woman', 'enter', 'wednesday', 'morning', 'oxidize', 'copper', 'ceiling', 'lending', 'green', 'water', 'surface', 'swimming', 'outfit', 'would', 'consider', 'prudish', 'standard', 'build', 'dress', 'sleeve', 'paddle', 'thick', 'black', 'tights', 'inside', 'locker', 'upside', 'window', 'ledge', 'bench', 'owner', 'head', 'ruffle', 'swimming', 'knot', 'scarf', 'swimmer', 'hasidic', 'woman', 'abide', 'strict', 'code', 'modesty', 'metropolitan', 'recreation', 'center', 'williamsburg', 'brooklyn', 'unusual', 'feature', 'swimming', 'pool', 'gender', 'segregate', 'hours', 'recreation', 'center', 'crown', 'heights', 'brooklyn', 'although', 'wednesday', 'urbanite', 'summer', 'solstice', 'outdoor', 'pool', 'open', 'season', 'child', 'could', 'carom', 'chlorine', 'swimming', 'season', 'placid', 'indoor', 'williamsburg', 'last', 'tempest', 'threaten', 'woman', 'sanctuary', 'await', 'decision', 'official', 'imminent', 'future', 'segregate', 'swimmin

['european', 'leaders', 'brussels', 'wednesday', 'prime', 'minister', 'david', 'cameron', 'britain', 'early', 'happening', 'today', 'brexit', 'news:•', 'leaders', 'european', 'union', 'country', 'meeting', 'brussels', 'discus', 'british', 'cameron', 'show', 'briefly', 'somber', 'dinner', 'tuesday', 'fellow', 'leaders', 'immigration', 'reason', 'compatriot', 'decide', 'leave', 'european', 'leaders', 'express', 'alarm', 'chancellor', 'angela', 'merkel', 'germany', 'would', 'economic', 'consequence', 'dalia', 'grybauskaite', 'president', 'lithuania', 'smell', 'coffee', 'prime', 'minister', 'charles', 'michel', 'belgium', 'call', 'britain', 'withdraw', 'alarm.”britain', 'negotiate', 'terms', 'divorce', 'formally', 'file', 'separation', 'papers', 'cameron', 'successor.•', 'lawmaker', 'britain', 'governing', 'party', 'jockey', 'replace', 'cameron', 'process', 'choose', 'conservative', 'party', 'leader', 'opening', 'today', 'nomination', 'close', 'thursday', 'boris', 'johnson', 'former', 'lon

['jason', '1-ranked', 'golfer', 'world', 'become', 'latest', 'player', 'olympics', 'concern', 'virus.“the', 'reason', 'decision', 'concern', 'possible', 'transmission', 'virus', 'potential', 'risk', 'present', 'future', 'pregnancy', 'future', 'member', 'family', 'statement', 'australia', 'p.g.a.', 'championship', 'first', 'major', 'title', 'olympics', 'first', 'since', 'large', 'number', 'player', 'pulling', 'future', 'doubt', 'barry', 'maister', 'international', 'olympic', 'committee', 'member', 'zealand', 'blast', 'golfer', 'drop', 'game', 'think', 'appalling', 'radio', 'station', 'newstalk', 'think', 'sport', 'allow', 'continue', 'game', 'scenario.”maister', 'add', 'getting', 'putting', 'second-', 'third', 'player', 'olympic', 'ideal', 'expectation', 'olympic', 'movement.”with', 'three', 'golfer', 'australia', 'scott', 'leishman', 'decide', 'olympics', 'three', 'golfer', 'south', 'africa', 'going', 'branden', 'grace', 'louis', 'oosthuizen', 'charl', 'schwartzel', 'shane', 'lowry', '

['traveling', 'years', 'nearly', 'billion', 'mile', 'spacecraft', 'announce', 'arrival', 'jupiter', 'simple', 'radio', 'signal', 'three', 'second', 'expect', 'marking', '35-minute', 'engine', 'spacecraft', 'allow', 'capture', 'jupiter', 'gravity', 'arrive', 'earth', '11:53', 'eastern', 'monday.“i', 'complete', 'going', 'celebration', 'nybakken', 'project', 'manager', 'means', 'orbit', 'around', 'jupiter', 'really', 'cool.”juno', 'mission', 'explore', 'enigma', 'beneath', 'cloud', 'jupiter', 'storm', 'swirl', 'century', 'extend', 'inside', 'solar', 'system', 'large', 'planet?“we', 'still', 'question', 'poise', 'begin', 'answer', 'diane', 'brown', 'program', 'executive', 'conference', 'month', 'first', 'craft', 'orbit', 'jupiter', 'decade', 'earlier', 'robotic', 'explorer', 'galileo', 'spend', 'eight', 'years', 'astound', 'image', 'planet', 'moon', 'reveal', 'feature', 'large', 'ocean', 'crust', 'europa', 'consider', 'promise', 'place', 'elsewhere', 'solar', 'system', 'focus', 'jupiter',

['wimbledon', 'england', 'roger', 'federer', 'tennis', 'perennial', 'sentimental', 'favorite', 'surrender', 'crown', 'wednesday', 'opponent', '772nd', 'rank', 'marcus', 'willis', 'improbable', 'second', 'round', 'match', 'wimbledon', 'moon.“i', 'think', 'story', 'sport', 'federer', 'around', 'enough', 'quite', 'monday', 'around', 'outsider', 'iceland', 'eliminate', 'england', 'reach', 'quarterfinal', 'european', 'soccer', 'championship', 'least', 'english', 'relish', 'underdog', 'perhaps', 'expense', 'national', 'honor', 'small', 'consolation', '25-year', 'willis', 'charge', 'pound', 'private', 'lesson', 'ponder', 'trans', 'atlantic', 'coach', 'philadelphia', 'romance', 'change', 'story', 'arc.“it', 'get', 'little', 'willis', 'hoopla', 'enjoy', 'rolling.”after', 'winning', 'three', 'match', 'prequalifying', 'qualifying', 'tournament', 'wimbledon', 'willis', 'three', 'match', 'granddaddy', 'tennis', 'tournament', 'monday', 'rank', 'ricardas', 'berankis', 'first', 'round', 'plenty', 'com

['omaha', 'coastal', 'carolina', 'know', 'elegant', 'chaucer', 'inspire', 'nickname', 'slug', 'first', 'college', 'world', 'series', 'chanticleers', 'division', 'reflect', 'coziness', 'renovate', 'stadium', 'flatter', 'baseball', 'n.c.a.a.', 'introduce', 'season', 'flies', 'farther', 'increase', 'scoring', 'nationwide', 'something', 'happen', 'chanticleers', 'college', 'baseball', 'resuscitate', 'offense', 'college', 'world', 'series', 'begin', 'ameritrade', 'weekend', 'although', 'team', 'eight', 'game', 'friday', 'night', 'scoring', 'drop', 'game', 'match', 'lowest', 'output', 'college', 'world', 'series', 'history', 'tying', '2014.teams', 'batting', 'point', 'worse', 'pitching', 'e.r.a.', 'three', 'game', 'end', 'shutout', 'oklahoma', 'state', 'become', 'first', 'game', 'event', 'revive', 'complaint', 'incoming', 'prevail', 'knock', 'drive', 'alley', 'permit', 'outfielder', 'singles', 'playing', 'shallow', 'tuesday', 'night', 'coastal', 'carolina', 'texas', 'christian', 'coach', 'gi

['world', 'ugly', 'contest', 'beast', 'beauty', 'wrinkle', 'wispy', 'haired', 'chinese', 'crest', 'chihuahua', 'name', 'sweepee', 'rambo', 'years', 'blind', 'pound', 'bigger', 'hands', 'together', 'crowd', 'favorite', 'contest', 'friday', 'night', 'sonoma', 'marin', 'petaluma', 'calif', 'audience', 'people', 'carry', 'sign', 'sweepee', 'president', 'marketing', 'director', 'karen', 'spencer', 'spencer', 'identify', 'owner', 'jason', 'wurtz', 'encino', 'calif.', 'swarm', 'reporter', 'france', 'germany', 'friday', 'night', 'immediately', 'available', 'comment', 'contest', 'entry', 'express', 'appreciation', 'sweepee', 'freckle', 'legendary', 'blond', 'mohawk', 'sweepee', 'describe', 'chick', 'enjoy', 'riding', 'owner', 'honda', 'motorcycle', 'entry', 'winner', 'quasi', 'short', 'spine', 'syndrome', 'birth', 'defect', 'anything', 'quasi', 'entire', 'could', 'owner', 'virginia', 'sayre', 'guardian', 'contest', 'names', 'roast', 'rascal', 'feature', 'particular', 'himisaboo', 'drawing', 'at

['janeiro', 'right', 'olympic', 'game', 'beating', 'madrid', 'tokyo', 'chicago', 'brazil', 'flying', 'although', 'escape', 'consequence', 'financial', 'crisis', 'suffer', 'economic', 'damage', 'quickly', 'country', 'include', 'unite', 'state', 'economy', 'boom', 'federal', 'government', 'flush', 'popular', 'president', 'inácio', 'silva', 'institute', 'series', 'expensive', 'social', 'program', 'help', 'million', 'brazilian', 'toward', 'better', 'economist', 'magazine', 'predict', 'brazil', 'would', 'world', 'fifth', 'large', 'economy', 'leapfrog', 'britain', 'france.“i’ve', 'never', 'pride', 'brazil', 'everyone', 'brazil', 'call', 'victory', 'announce', 'going', 'world', 'great', 'country', 'copacabana', 'beach', 'party', 'break', 'game', 'begin', 'week', 'nobody', 'party', 'anymore', 'economic', 'social', 'political', 'conditions', 'facing', 'brazil', 'change', 'drastically', 'corruption', 'scandal', 'begin', 'country', 'giant', 'company', 'petrobras', 'result', 'exposés', 'investigat

['boeing', 'company', 'thursday', 'offer', 'details', 'propose', 'passenger', 'aircraft', 'reject', 'suggestion', 'sufficient', 'homework', 'identify', 'user', 'plane', 'letter', 'congressional', 'critic', 'politically', 'delicate', 'boeing', 'national', 'airline', 'intend', 'passenger', 'plane', 'variety', 'model', 'worth', 'billion', 'lease', 'company', 'delivery', 'project', 'begin', 'early', 'letter', 'keating', 'president', 'government', 'operations', 'vigorous', 'compliance', 'mechanism', 'boeing', 'regard', 'screening', 'party', 'business.”mr', 'keating', 'write', 'boeing', 'strictly', 'adhere', 'dealings', 'iranian', 'entity', 'approve', 'unite', 'state', 'sanction', 'monitors.“we', 'could', 'corporation', 'reasonably', 'expect', 'better', 'intelligence', 'resource', 'government', 'keating', 'write', 'therefore', 'government', 'provide', 'information', 'need', 'remain', 'compliant.”boeing', 'propose', 'disclose', 'month', 'potentially', 'significant', 'economic', 'outcome', 'in

['james', 'ryanlearning', 'technology', 'offer', 'great', 'potential', 'improve', 'education', 'whether', 'potential', 'realize', 'depend', 'three', 'factor', 'technology', 'people', 'using', 'first', 'ensure', 'sight', 'learning', 'learning', 'technology', 'student', 'learn', 'different', 'rates', 'critical', 'master', 'topic', 'move', 'student', 'engage', 'likely', 'learn', 'learn', 'deeply', 'active', 'learning', 'likely', 'engage', 'student', 'passive', 'learning', 'follow', 'technology', 'tailor', 'learning', 'interest', 'student', 'likely', 'bigger', 'payoff', 'second', 'recognize', 'adaptive', 'challenge', 'technical', 'thinking', 'fundamental', 'question', 'organize', 'class', 'school', 'think', 'teacher', 'expect', 'student', 'learn', 'technical', 'question', 'question', 'organizational', 'behavior', 'teacher', 'principal', 'finally', 'commit', 'impartial', 'third', 'party', 'evaluation', 'learning', 'technology', 'working', 'judgment', 'balance', 'sense', 'urgency', 'patience

['recently', 'visit', 'hometown', 'birmingham', 'recognize', 'achieve', 'eighth', 'grader', 'public', 'school', 'found', 'reflect', 'experience', '12-year', 'participate', 'child', 'march', 'fight', 'access', 'better', 'education', 'wonder', 'come?today', 'student', 'begin', 'college', 'earn', 'degree', 'years', 'later', 'completion', 'rates', 'lower', 'student', 'color', 'example', 'among', 'student', 'start', 'college', 'university', 'percent', 'graduate', 'institution', 'within', 'years', 'percent', 'african', 'american', 'equally', 'striking', 'income', 'percent', 'american', 'family', 'income', 'quartile', 'attain', 'bachelor', 'degree', 'compare', 'fewer', 'percent', 'family', 'bottom', 'income', 'quartile', 'clear', 'increase', 'access', 'college', 'enough', 'ensure', 'student', 'succeed', 'require', 'strategy', 'better', 'understanding', 'students’', 'strength', 'challenge', 'supporting', 'teacher', 'faculty', 'development', 'solution', 'start', 'agree', 'acceptable', 'large', 

['cairo', 'egyptian', 'court', 'tuesday', 'nullify', 'government', 'decision', 'transfer', 'sovereignty', 'strategic', 'island', 'saudi', 'arabia', 'surprise', 'setback', 'president', 'abdel', 'fattah', 'ruling', 'surprise', 'critic', 'expect', 'judiciary', 'government', 'april', 'transfer', 'tiran', 'sanafir', 'uninhabited', 'island', 'mouth', 'aqaba', 'saudi', 'arabia', 'visit', 'saudi', 'monarch', 'salman', 'saudi', 'arabia', 'place', 'island', 'egyptian', 'control', 'fear', 'israel', 'might', 'seize', 'portray', 'return', 'correction', 'historical', 'quirk', 'rather', 'saudi', 'arabia', 'major', 'financial', 'supporter', 'since', 'military', 'chief', 'ouster', 'islamist', 'predecessor', 'mohamed', 'morsi', '2013.the', 'island', 'strategic', 'value', 'mouth', 'aqaba', 'route', 'jordanian', 'aqaba', 'eilat', 'israel', 'egyptian', 'saudi', 'official', 'sign', 'least', 'agreement', 'visit', 'include', 'sinai', 'development', 'package', 'worth', 'billion', 'egypt', 'years', 'state', 'me

['vega', 'donald', 'trump', 'nevada', 'headquarters', 'stand', 'tape', 'declare', 'move', 'forwarding', 'information', 'available.”on', 'weekday', 'morning', 'hampshire', 'another', 'battleground', 'state', 'november', 'single', 'worker', 'hover', 'trump', 'office', 'manchester', 'national', 'campaign', 'trump', 'tower', 'manhattan', 'trump', 'presumptive', 'republican', 'presidential', 'nominee', 'cloister', 'group', 'relative', 'longtime', 'business', 'associate', 'rely', 'staff', 'dozen', 'people', 'electorate', 'million', 'trump', 'dominate', 'campaign', 'cable', 'social', 'medium', 'drawing', 'large', 'crowd', 'incendiary', 'speech', 'immigration', 'national', 'security', 'candidacy', 'falter', 'important', 'political', 'organization', 'sweep', 'primary', 'caucus', 'skeletal', 'campaign', 'staff', 'budget', 'fund', 'largely', 'account', 'compete', 'hillary', 'clinton', 'presumptive', 'democratic', 'opponent', 'shadow', 'financial', 'political', 'infrastructure', 'amass', 'crucial'

['london', 'charge', 'murder', 'british', 'lawmaker', 'first', 'court', 'appearance', 'saturday', 'telling', 'court', 'ask', 'death', 'traitor', 'freedom', 'britain.”thomas', 'charge', 'earlier', 'saturday', 'murder', 'several', 'offense', 'include', 'inflict', 'grievous', 'bodily', 'possession', 'firearm', 'birstall', 'appear', 'briefly', 'westminster', 'magistrates’', 'court', 'london', 'charge', 'bring', 'british', 'medium', 'prohibit', 'reporting', 'might', 'prejudice', 'trial', 'member', 'parliament', 'advocate', 'immigrants’', 'cause', 'gun', 'thursday', 'outside', 'library', 'birstall', 'leeds', 'northern', 'england', 'wrapping', 'meeting', 'constituent', '77-year', 'slightly', 'injure', 'attack', 'death', 'referendum', 'whether', 'britain', 'leave', 'european', 'union', 'immediate', 'suspension', 'campaigning', 'side', 'respect', 'memory', 'leave', 'support', 'britain', 'departure', '28-nation', 'friday', 'would', 'resume', 'campaigning', 'weekend', 'issue', 'immigration', 'nat

['chris', 'wilson', 'move', 'bushwick', 'clinton', 'brooklyn', 'husband', 'brown', 'child', 'amaze', 'nearby', 'resident', 'offer', 'accommodate', 'move', 'truck.“this', 'place', 'street', 'parking', 'prime', 'wilson', 'buy', 'bedroom', 'bathroom', 'condominium', 'brown', 'bleecker', 'street', '750,000', 'somebody', 'would', 'could', 'truck', 'there?’', 'community.”mr', 'wilson', 'register', 'nurse', 'spend', '1980s', '1990s', 'living', 'village', 'bushwick', 'flavor', 'really', 'enjoy', 'it.”bushwick', '113,000', 'resident', 'accord', 'unite', 'state', 'census', 'quite', 'change', 'recent', 'years', 'number', 'hispanic', 'resident', '73,608', 'percent', 'population', 'increase', 'percent', 'number', 'puerto', 'ricans', 'decline', 'mexican', 'dominican', 'growing', 'number', 'white', 'hispanic', 'triple', '9,560', 'number', 'black', 'hispanic', 'almost', 'percent', '22,686.the', 'number', 'resident', 'around', 'percent', '34,227', '2010.paris', 'smeraldo', 'open', 'perhaps', 'first', '

[]


['dermatologist', 'today', 'online', 'tool', 'doctor', 'network', 'likely', 'visit', 'still', 'tool', 'whether', 'already', 'deductible', 'offer', 'review', 'doctor', 'behind', 'tool', 'available', 'health', 'insurer', 'start', 'several', 'years', 'harness', 'power', 'call', 'company', 'medical', 'claim', 'source', 'information', 'people', 'become', 'sophisticate', 'shopper', 'medical', 'promise', 'overall', 'health', 'process', 'estimate', 'savings', 'newfound', 'transparency', 'could', 'billion', 'dollar', 'limit', 'become', 'increasingly', 'clear', 'people', 'access', 'newly', 'available', 'information', 'insight', 'impossible', 'example', 'whether', 'dermatologist', 'costs', 'twice', 'another', 'successfully', 'diagnose', 'cancer.“price', 'transparency', 'tool', 'likely', 'panacea', 'hope', 'respect', 'control', 'health', 'costs', 'kevin', 'volpp', 'director', 'center', 'health', 'incentive', 'behavioral', 'economics', 'leonard', 'davis', 'institute', 'write', 'editorial', 'journal

['federal', 'judge', 'manhattan', 'thursday', 'reject', 'request', 'trader', 'turkey', 'grant', 'unusual', 'release', 'pending', 'trial', 'would', 'allow', 'million', 'secure', 'million', 'finance', 'gild', 'judge', 'richard', 'berman', 'deny', 'trader', 'zarrab', 'proposal', 'recently', 'lease', 'apartment', '24-hour', 'arm', 'guard', 'monitoring', 'expense', 'unreasonable', 'help', 'foster', 'inequity', 'unequal', 'treatment', 'favor', 'small', 'cohort', 'criminal', 'defendant', 'extremely', 'wealthy', 'zarrab.”judge', 'berman', 'prosecutor', 'show', 'zarrab', 'pose', 'flight', 'conditions', 'include', 'privately', 'fund', 'arm', 'guard', 'reasonably', 'assure', 'appearance', 'trial.”mr', 'zarrab', 'move', 'turkey', 'infant', 'arrest', 'march', 'florida', 'daughter', 'manhattan', 'charge', 'include', 'conspire', 'violate', 'unite', 'state', 'sanction', 'zarrab', 'plead', 'guilty', 'lawyer', 'benjamin', 'brafman', 'ask', 'judge', 'berman', 'grant', 'zarrab', 'special', 'conditions', '

['donald', 'trump', 'latest', 'point', 'contention', 'gawker', 'medium', 'hogan', 'lawyer', 'letter', 'gawker', 'behalf', 'treatment', 'clinic', 'charles', 'hard', 'represent', 'hogan', 'retire', 'wrestler', 'invasion', 'privacy', 'lawsuit', 'medium', 'company', 'recent', 'article', 'trump', 'numerous', 'false', 'defamatory', 'statement', 'clients.”mr', 'hard', 'demand', 'gawker', 'immediately', 'remove', 'article', 'publish', 'public', 'apology', 'retraction', 'letter', 'threaten', 'legal', 'action', 'behalf', 'clinic', 'ivari', 'international', 'edward', 'ivari', 'letter', 'continue', 'legal', 'battle', 'hard', 'gawker', 'angeles', 'base', 'hard', 'mirell', 'abrams', 'thrust', 'spotlight', 'month', 'reveal', 'silicon', 'valley', 'billionaire', 'peter', 'thiel', 'secretly', 'provide', 'financial', 'support', 'lawsuit', 'gawker', 'clear', 'whether', 'thiel', 'pledge', 'delegate', 'trump', 'republican', 'convention', 'behind', 'letter', 'letter', 'obtain', 'times', 'list', 'false', 'def

['euro', '290).this', 'sleekly', 'refurbish', 'rebranded', 'property', 'emerge', 'rising', 'night', 'spring', 'getting', 'management', 'major', 'makeover', 'único', 'hotel', 'spend', 'years', 'radar', 'despite', 'stellar', 'location', 'junction', 'madrid', 'trendy', 'chueca', 'neighborhood', 'bustle', 'enhance', 'principal', 'appeal', 'traveler', 'local', 'panoramic', 'view', 'madrid', 'famously', 'ebullient', 'architecture', 'along', 'reason', 'public', 'area', 'understate', 'lobby', 'restaurant', 'floor', 'terrace', 'balcony', 'optimize', 'dining', 'drinking', 'sprawling', 'rooftop', 'terrace', 'become', 'magnet', 'stylish', 'madrileños', 'summer', 'match', 'spectacular', 'scenery', 'newly', 'add', 'artisanal', 'cocktail', 'mention', 'modern', 'mediterranean', 'ramón', 'freixa', 'michelin', 'star', 'namesake', 'restaurant', 'sister', 'hotel', 'único', 'across', 'steps', 'broadway', 'madrid', 'principal', 'tuck', 'quiet', 'street', 'leading', 'gallery-', 'restaurant', 'dense', 'chueca

['years', 'fernando', 'lopez', 'storefront', 'money', 'transfer', 'business', 'atlantic', 'place', 'local', 'resident', 'could', 'money', 'family', 'friend', 'abroad', 'business', 'interamericana', 'express', 'handle', 'transmittal', 'mexico', 'dwindling', 'banks', 'regulator', 'strict', 'cross', 'border', 'money', 'transfer', 'lopez', 'business', 'account', 'close', 'several', 'times', 'recent', 'years', 'three', 'different', 'banks', 'money', 'laundering', 'concern', 'despite', 'state', 'license', 'hours', 'compliance', 'training', 'binder', 'rule', 'regulation', 'since', 'drop', 'banks', 'operate', 'money', 'transfer', 'business', 'agent', 'moneygram', 'international', 'money', 'transfer', 'deposit', 'transfer', 'account', 'company', 'banks', 'money', 'transfer', 'business', 'directly', 'frustrate', 'try', 'convert', 'offer', 'notary', 'payment', 'services', 'pound', 'parcel', 'shipping', 'center', 'banks', 'small', 'business', 'money', 'transfer', 'operation', 'lopez', 'said.“we', 

['donald', 'trump', 'advice', 'panic', 'republican', 'washington', 'melting', 'incendiary', 'statement', 'up.“politicians', 'politically', 'correct', 'anymore', 'breathe', 'trump', 'interview', 'tuesday', 'afternoon', 'fellow', 'republican', 'forcefully', 'protest', 'ethnically', 'charge', 'criticism', 'federal', 'judge', 'oversee', 'lawsuit', 'defunct', 'trump', 'university.“the', 'people', 'tire', 'political', 'correctness', 'things', 'totally', 'interlude', 'exceptional', 'stress', 'trump', 'campaign', 'control', 'gridlock', 'mouths.”even', 'chastise', 'washington', 'political', 'class', 'backbone', 'trump', 'exhibit', 'modest', 'sign', 'later', 'tuesday', 'getting', 'message', 'remark', 'questioning', 'fairness', 'judge', 'gonzalo', 'curiel', 'mexican', 'heritage', 'cross', 'apologize', 'issue', 'statement', 'comment', 'judge', 'curiel', 'misconstrue', 'final', 'republican', 'primary', 'night', 'victory', 'speech', 'strike', 'conventional', 'least', 'giving', 'discipline', 'address

['london', 'hands', 'founder', 'private', 'equity', 'terra', 'firma', 'capital', 'partner', 'deny', 'court', 'wednesday', 'conversation', 'claim', 'senior', 'executive', 'citigroup', 'ahead', 'disastrous', '2007.his', 'sue', 'citigroup', 'london', 'several', 'misrepresentation', 'hands', 'claim', 'executive', 'induce', 'complete', 'storied', 'music', 'label', 'acquisition', 'value', 'billion', 'pound', 'billion', 'hands', 'damage', 'reputation', 'shrewd', 'investor', 'erase', 'personal', 'fortune', 'seize', 'lender', 'face', 'crushing', 'amount', 'acquire', 'universal', 'music', 'group', 'hands', 'wednesday', 'personally', 'million', 'euro', 'million', 'wednesday', 'citigroup', 'lawyer', 'press', 'hands', 'motivation', 'behind', 'lawsuit', 'question', 'whether', 'claim', 'hands', 'file', 'lender', 'manchester', 'separately', 'pursue', 'litigation', 'unite', 'state', 'attempt', 'shakedown.”at', 'terra', 'firma', 'accused', 'charles', 'prince', 'former', 'chief', 'executive', 'citigroup'

['louisville', 'muhammad', 'fill', 'middle', 'three', 'screen', 'small', 'theater', 'footage', 'take', 'years', 'olympic', 'medal', 'performance', 'explain', 'decision', 'vietnam', 'group', 'white', 'college', 'students.“you', 'stand', 'america', 'religious', 'belief', 'convert', 'christianity', 'islam', 'somewhere', 'fight', 'stand', 'home.”from', 'darken', 'muhammad', 'center', 'saturday', 'mcelvaney', 'talbott', 'hear', 'words', 'could', 'right', 'exclaim', 'caring', 'voice', 'carry', 'hometown', 'mcelvaney', 'talbott', 'share', 'staging', 'citywide', 'weekend', 'native', 'friday', 'decade', 'struggle', 'parkinson', 'disease', 'outside', 'center', 'flag', 'lower', 'staff', 'inside', 'mcelvaney', 'talbott', 'memory', 'trigger', '15-minute', 'going', 'around', 'people', 'respects', 'bearing', 'bouquet', 'flower', 'boxing', 'glove', 'handmade', 'sign', 'letters', 'case', 'silent', 'blessing', 'kerry', 'borvan', 'traveling', 'nephew', 'chicago', 'dollywood', 'theme', 'pigeon', 'forge', 

['rufus', 'wainwright', 'remember', 'night', 'climb', 'musical', 'mount', 'everest', 'jump', 'one.“i’d', 'never', 'bottom', 'first', 'night', 'perform', 'singer', 'songwriter', 'recall', 'rufus', 'carnegie', 'creation', 'legendary', 'concert', 'garland', 'perform', 'space', '1961.backed', '36-piece', 'orchestra', 'wainwright', 'summon', 'garland', 'restless', 'spirit', 'jazzy', 'anthem', 'entertainment', 'openhearted', 'ballad', 'stormy', 'weather', 'repeat', 'garland', 'original', 'banter', 'please', 'talmudic', 'scholar', 'recite', 'sacred', 'result', 'showbiz', 'stunt', 'postmodern', 'happening', 'fanboy', 'dream', '32-year', 'claim', 'worship', 'stonewall', 'generation', 'live', 'album', 'incarnation', 'wainwright', 'receive', 'grammy', 'nomination', 'years', 'later', 'resurrect', 'concert', 'carnegie', 'homage', 'homage', 'playing', 'following', 'luminato', 'festival', 'toronto', 'husband', 'weisbrodt', 'outgo', 'artistic', 'director', 'wainwright', 'performer', 'person', 'decade'

['midnight', 'saturday', 'jimmy', 'glenn', 'slump', 'small', 'wooden', 'chair', 'times', 'square', 'aglow', 'cable', 'broadcast', '45-year', 'sanctum', 'boxing', 'ephemera', 'call', 'jimmy', 'corner', 'pack', 'dozens', 'patron', 'various', 'stage', 'inebriation', 'celebration', 'glenn', 'could', 'hardly', 'bring', 'career', 'glenn', 'serve', 'friend', 'occasional', 'trainer', 'muhammad', 'boxing', 'facility', 'times', 'square', 'street', 'spend', 'countless', 'hours', 'dancing', 'around', 'throw', 'stinging', 'glenn', 'pad', 'hands', 'although', 'halt', 'speech', 'labor', 'stride', 'portend', 'demise', 'decade', 'legendary', 'boxer', 'death', 'glenn', 'shock', 'unmoored.“he', 'among', 'fighter', 'politician', 'everything.”above', 'glenn', 'value', 'political', 'courage', 'unyielding', 'honesty', 'sport', 'sully', 'corruption', 'favorite', 'moment', 'boxer', 'career', 'refusal', 'conscript', 'military', 'vietnam', 'war.“he', 'brave', 'fighter', 'battle', 'outside', 'never', 'athlete', '

['explanation', 'fadell', 'former', 'executive', 'apple', 'know', 'aggressive', 'management', 'style', 'offer', 'departure', 'maker', 'digital', 'version', 'household', 'staple', 'thermostat', 'smoke', 'detector', 'help', 'found', 'google', 'alphabet', 'google', 'parent', 'company', 'announce', 'friday', 'fadell', 'leaving', 'leading', 'years', 'include', 'ownership', 'google', 'buy', 'billion', 'departure', 'come', 'month', 'controversy', 'regard', 'leadership', 'fadell', 'company', 'become', 'adviser', 'alphabet', 'chief', 'executive', 'larry', 'recent', 'interview', 'office', 'fadell', 'talk', 'career', 'apple', 'engineering', 'create', 'digital', 'music', 'player', 'work', 'first', 'three', 'version', 'iphone', 'found', 'rogers', 'young', 'apple', 'engineer.“i’m', 'beginning', 'things', 'fadell', 'maintenance', 'bed.”but', 'leaving', 'article', 'technology', 'site', 'information', 'recode', 'describe', 'harsh', 'corporate', 'culture', 'abrasiveness', 'resignation', 'stymie', 'produ

['jinshan', 'taiwan', 'paparazzo', 'gather', 'dozens', 'brave', 'sticky', 'sometimes', 'lanky', 'quarry', 'would', 'chase', 'times', 'celebrity', 'would', 'cooperate', 'particularly', 'snail', 'offer', 'drawing', 'crowd', 'farming', 'hamlet', 'northern', 'coast', 'taiwan', 'sighting', 'subject', 'adoration', 'teenage', 'mandopop', 'hiding', 'siberian', 'crane', 'fewer', '4,000', 'world', 'asian', 'island', 'tropics', 'environmentalist', 'call', 'little', 'white', 'crane', 'jinshan', 'rural', 'district', 'spend', 'movement', 'track', 'facebook', 'local', 'medium', 'briefly', 'venture', 'south', 'mountain', 'temporary', 'refuge', 'parking', 'taipei', 'subway', 'station', 'satellite', 'truck', 'line', 'country', 'roads', 'monitor', 'return', 'local', 'government', 'hire', 'bodyguard', 'crane', 'face', 'threat', 'feral', 'powerful', 'typhoon', 'big', 'problem', 'sightseers.“they', 'control', 'chuang', 'liang', 'guard', 'want', 'close', 'touch', 'him.”i', 'first', 'encounter', 'watching', '

['william', 'presidential', 'nominee', 'libertarian', 'party', 'wednesday', 'believe', 'hillary', 'clinton', 'nothing', 'criminal', 'using', 'personal', 'email', 'secretary', 'state', 'spoke', 'range', 'interview', 'alongside', 'johnson', 'former', 'governor', 'mexico', 'libertarian', 'party', 'presidential', 'nominee', 'secure', 'libertarian', 'nomination', 'sunday', 'party', 'contentious', 'convention', 'orlando', 'weekend', 'remark', 'serve', 'governor', 'massachusetts', 'republican', '1990s', 'deeply', 'criticism', 'republican', 'clinton', 'tenure', 'state', 'department', 'scandal', 'involve', 'private', 'server', 'email', 'comment', 'could', 'skepticism', 'among', 'libertarian', 'voter', 'right', 'center', 'years', 'governor', 'suspicion', 'could', 'inflame', 'criticism', 'facto', 'stalking', 'horse', 'clinton', 'democratic', 'front', 'runner', 'nomination', 'single', 'response', 'whether', 'clinton', 'secretary', 'state', 'yes.”he', 'equally', 'succinct', 'ask', 'private', 'email

['actress', 'hudson', 'need', 'head', 'wyeth', 'vintage', 'dunbar', 'edward', 'wormley', 'designer', 'tomas', 'maier', 'need', 'coffee', 'table', 'plunk', 'credit', 'safety', 'glass', 'stainless', 'steel', 'coffee', 'table', 'd’urso', 'hollywood', 'agent', 'bryan', 'lourd', 'search', 'lounge', 'chair', 'pick', 'black', 'leather', 'wegner', 'wyeth', 'price', 'enormous', 'selection', 'danish', 'modernist', 'piece', 'store', 'arguably', 'premiere', 'midcentury', 'modern', 'emporium', 'unite', 'state', 'owner', 'birch', 'elusive', 'talk', 'dealer', 'furniture', 'scene', 'larry', 'gagosian', 'interior', 'decorator', 'strong', 'taste', 'mirror', 'market', 'help', 'birch', 'square', 'jaw', '53-year', 'green', 'pepper', 'spackled', 'hobnob', 'olsen', 'twins', 'feature', 'assortment', 'furniture', 'recent', 'presentation', 'introduce', 'helmut', 'assortment', 'wegner', 'furniture', 'recently', 'giving', 'jimmy', 'fallon', 'advice', 'gramercy', 'sleek', 'bamboo', 'dining', 'table', 'design', 'bi

[]
[]
[]
['toronto', 'yankees’', 'offense', 'straits', 'arrive', 'ninth', 'inning', 'monday', 'manager', 'girardi', 'hands', 'struggle', 'stalwart', 'brett', 'gardner', 'hitless', 'pinch', 'rookie', 'refsnyder', 'rodriguez', 'given', '1-for-16', 'stay', 'anchor', 'bench', 'rally', 'short', 'drop', 'yankee', 'game', 'behind', 'first', 'place', 'boston', 'american', 'league', 'toronto', 'right', 'hander', 'marco', 'estrada', 'yankee', 'three', 'eight', 'innings', 'brian', 'mccann', 'homer', 'aaron', 'teixeira', 'double', 'storen', 'bring', 'tying', 'plate', 'girardi', 'stick', 'starlin', 'castro', 'whose', 'homer', 'lift', 'yankee', 'sunday', 'tampa', 'chase', 'headley', 'castro', 'flied', 'headley', 'strike', 'refsnyder', 'strike', 'eight', 'pitch', 'leading', 'girardi', 'rodriguez', 'headley', 'storen', 'hand', 'hitter', 'season', 'though', 'imagine', 'rodriguez', 'since', 'return', 'disabled', 'would', 'given', 'gardner', 'complaints.“i’ve', 'always', 'playing', 'getting', 'pinch', 'g

['strict', 'evangelical', 'participate', 'church', 'child', 'without', 'fully', 'understanding', 'principle', 'adult', 'subscribe', 'religion', 'immediate', 'family', 'however', 'continue', 'deeply', 'religious', 'adhere', 'believe', 'extreme', 'view', 'course', 'worship', 'please', 'however', 'issue', 'hypocrisy', 'life', 'example', 'strongly', 'choice', 'abortion', 'complicate', 'pregnancy', 'family', 'adopt', 'child', 'become', 'difficult', 'boycott', 'parental', 'wedding', 'bride', 'groom', 'previously', 'married', 'participate', 'would', 'bless', 'personal', 'aspect', 'openly', 'oppose', 'l.g.b.t.', 'right', 'support', 'politician', 'group', 'extremely', 'attend', 'school', 'require', 'opposition', 'civil', 'liberty', 'code', 'conduct', 'belong', 'church', 'openly', 'welcome', 'certain', 'family', 'members’', 'home', 'lifestyle', 'choice.”while', 'certainly', 'confound', 'hurtful', 'terms', 'limited', 'nearly', 'interaction', 'family', 'living', 'happy', 'successful', 'friend', 'c

['mount', 'sinai', 'israel', '825-bed', 'hospital', 'serve', 'downtown', 'manhattan', 'years', 'close', 'replace', 'smaller', 'building', 'hospital', 'official', 'wednesday', 'mount', 'sinai', 'health', 'system', 'israel', 'hospital', 'campus', 'would', 'years', 'hospital', 'close', 'primary', 'specialty', 'behavioral', 'outpatient', 'surgery', 'services', 'would', 'expand', 'israel', 'hospital', 'either', 'close', 'overhaul', 'operate', 'since', 'reduction', 'services', 'lower', 'manhattan', 'especially', 'decision', 'reflect', 'broad', 'trend', 'hospital', 'across', 'country', 'struggle', 'financially', 'health', 'costly', 'kenneth', 'davis', 'president', 'chief', 'executive', 'mount', 'sinai', 'health', 'system.“we', 'macroeconomics', 'health', 'unaffordable', 'everyone', 'davis', 'interview', 'wednesday', 'israel', 'stand', 'billion', 'years', 'change', 'federal', 'reimbursement', 'structure', 'alone', 'hospital', 'operate', 'million', 'davis', 'large', 'hospital', 'longer', 'effic

['gyllenhaal', 'agree', 'night', 'performance', 'sunday', 'george', 'raise', 'money', 'center', 'gyllenhaal', 'although', 'know', 'wow', 'musical', 'theater', 'summer', 'encore', 'center', 'production', 'little', 'horror', 'center', 'sunday', 'music', 'lyric', 'stephen', 'sondheim', 'james', 'lapine', 'beloved', 'show', 'musical', 'theater', 'canon', 'artistic', 'romantic', 'compromise', 'face', 'painter', 'george', 'seurat', 'pulitzer', 'prize', '1985.the', 'concert', 'performance', 'produce', 'jeanine', 'tesori', 'winning', 'composer', 'leaving', 'position', 'artistic', 'director', 'encore', 'center', 'continue', 'relationship', 'center', 'artistic', 'adviser', 'honoring', 'philanthropist', 'adrienne', 'arsht', 'schedule', 'performance', 'ticket', 'beginning', 'benefit', 'ticket', 'considerably', 'expensive', '2,500', '10,000', '25,000', '150,000', 'table.)","http://www.nytimes.com/2016/05/25/arts', 'music', 'gyllenhaal', 'concert', 'production', 'sunday', 'george.html']
[]
['familia

['fight', 'control', 'sumner', 'redstone', 'billion', 'medium', 'empire', 'escalate', 'significantly', 'unexpected', 'development', 'redstone', 'friday', 'remove', 'philippe', 'dauman', 'chief', 'executive', 'viacom', 'trust', 'control', 'company', 'declare', 'incompetent', 'redstone', 'remove', 'george', 'abrams', 'longtime', 'director', 'viacom', 'dauman', 'abrams', 'remove', 'board', 'national', 'amusement', 'private', 'theater', 'chain', 'company', 'redstone', 'frail', 'health', 'control', 'medium', 'empire', 'early', 'saturday', 'morning', 'redstone', 'confirm', 'statement', 'michael', 'partner', 'orrick', 'herrington', 'sutcliffe.“sumner', 'redstone', 'decisive', 'lawful', 'action', 'today', 'firmly', 'belief', 'interest', 'viacom', 'stockholder', 'remove', 'philippe', 'dauman', 'george', 'abrams', 'trustee', 'trust', 'director', 'national', 'amusement', 'statement', 'redstone', 'act', 'express', 'concern', 'regard', 'viacom', 'performance', 'messrs.', 'abrams', 'dauman', 'viacom

['phoenix', 'little', 'debate', 'arizona', 'become', 'state', 'impose', 'limit', 'assistance', 'needy', 'family', 'cutting', 'maximum', 'duration', 'benefit', 'third', 'since', 'new', 'limit', 'begin', 'welfare', 'recipient', 'learning', 'benefit', 'near', 'robinson', 'mother', '4-year', 'receive', 'assistance', 'eight', 'month', 'land', 'center', 'supply', 'retailer', 'automate', 'position', 'eliminate', 'receive', 'month', 'payment', 'up.“i', 'really', 'proud', 'robinson', 'pick', 'grocery', 'phoenix', 'anniversary', 'clinton', 'welfare', 'approach', 'impact', 'requirement', 'political', 'rift', 'expose', 'resurface', 'campaign', 'trail', 'autobiography', 'living', 'history', 'hillary', 'clinton', 'agree', 'husband', 'decision', 'welfare', 'though', 'outrage', 'loyal', 'supporter', 'program', 'family', 'dependent', 'child', 'create', 'help', 'create', 'generation', 'welfare', 'dependent', 'american', 'write', 'rival', 'democratic', 'presidential', 'nomination', 'senator', 'bernie', '

['william', 'hager', 'option', 'carolyn', 'hager', 'years', 'married', 'medication', 'become', 'burdensome', 'could', 'longer', 'afford', 'monday', 'morning', 'sleeping', 'deputy', 'florida', 'killing', 'lucie', 'hager', 'explanation', 'detail', 'arrest', 'affidavit', 'local', 'medium', 'hager', 'arrest', 'charge', 'first', 'degree', 'premeditate', 'murder', 'appear', 'highlight', 'difficulty', 'face', 'older', 'people', 'retire', 'fix', 'income', 'struggle', 'medicine', 'sheriff', 'office', 'hager', 'deputy', 'illness', 'ailment', 'require', 'numerous', 'medication', 'could', 'longer', 'afford', 'affidavit', 'accord', 'study', 'advocacy', 'group', 'people', 'specialty', 'drug', 'treat', 'complex', 'chronic', 'conditions', 'parkinson', 'disease', 'rheumatoid', 'arthritis', 'price', 'deputy', 'hager', 'severe', 'arthritis', 'health', 'issue', 'decline', 'drug', 'need', 'cite', 'privacy', 'accord', 'station', 'beach', 'record', 'hagers', 'file', 'bankruptcy', 'hager', 'work', 'sear', 'sh

['imprimatur', 'legendary', 'investor', 'warren', 'buffett', 'official', 'apple', 'world', 'large', 'company', 'market', 'capitalization', 'symbol', 'american', 'technological', 'innovation', 'value', 'stock', 'prove', 'decidedly', 'mix', 'blessing', 'buffett', 'world', 'prominent', 'successful', 'proponent', 'value', 'investing', 'approach', 'seek', 'stocks', 'undervalue', 'intrinsic', 'value', 'benjamin', 'graham', 'classic', 'intelligent', 'investor', 'buffett', 'credits', 'graham', 'shaping', 'approach', 'investing', 'value', 'investor', 'notice', 'buffett', 'holding', 'company', 'berkshire', 'hathaway', 'disclose', 'invest', 'billion', 'apple', 'stock', 'quarter.“we’ve', 'look', 'smead', 'manage', 'smead', 'value', 'successful', 'large', 'value', 'mutual', 'funds', 'years', 'accord', 'morningstar', 'anybody', 'discount', 'thinking', 'berkshire', 'hathaway', 'peril', 'opinion.”value', 'stocks', 'typically', 'unpopular', 'among', 'investor', 'share', 'often', 'batter', 'disappoint',

['familiar', 'bottle', 'liquor', 'sincerely', 'cocktail', 'open', 'january', 'english', 'call', 'bushwick', 'instead', 'house', 'cocktail', 'dispense', 'custom', 'porcelain', 'head', 'serve', 'graduate', 'glass', 'beaker', 'found', 'school', 'chemistry', 'class', 'tattoo', 'beard', 'bartender', 'aromatic', 'spritz', 'refresh', 'break', 'mixology', 'insist', 'making', 'cocktail', 'scratch', 'placeit', 'occupy', 'former', 'store', 'otherwise', 'quiet', 'stretch', 'wilson', 'avenue', 'residential', 'bushwick', 'line', 'vinyl', 'side', 'home', 'inside', 'space', 'narrow', 'décor', 'grandmotherly', 'frame', 'painting', 'flower', 'green', 'wall', 'chandelier', 'cast', 'feeble', 'light', 'marble', 'banquette', 'table', 'offer', 'patron', 'place', 'gather', 'beaker', 'crowdfun', 'love', '20-somethings', 'jade', 'small', 'recent', 'friday', 'night', 'group', 'young', 'cape', 'floral', 'shirt', 'crotch', 'pants', 'discuss', 'netflix', 'sitcom', 'unbreakable', 'kimmy', 'schmidt', 'nearby', 'table

['bloodletting', 'begin', 'beginning', 'television', 'networks’', 'annual', 'upfront', 'presentation', 'charm', 'advertiser', 'hope', 'entice', 'offering', 'event', 'first', 'return', 'series', 'network', 'cable', 'channels', 'means', 'learning', 'show', 'thursday', 'network', 'dozen', 'series', 'newcomer', 'acclaim', 'grinder', 'unloved', 'bordertown', 'critic', 'favorite', 'castle', 'agent', 'carter', 'nashville', 'another', 'popular', 'supergirl', 'years', 'officially', 'close', 'demise', 'cyber', 'remain', 'spinoff', 'friday', 'add', 'show', 'reject', 'include', 'drama', 'silence', 'debut', 'month', 'telenovela', 'spoof', 'star', 'longoria', 'cancel', 'show', 'network', 'show', 'shutter', 'notably', 'comic', 'crime', 'drama', 'castle', 'originally', 'secure', 'ninth', 'season', 'surprise', 'cancellation', 'contract', 'star', 'stana', 'katic', 'renew', 'tamala', 'jones', 'devotion', 'carry', 'unforgettable', 'eight', 'season', 'katic', 'post', 'twitter', 'friday', 'morning', 'nashvi

['expect', 'ahead', 'technologyoracle', 'google', 'court', 'monday', 'determine', 'whether', 'google', 'unfairly', 'oracle', 'property', 'software', 'android', 'operate', 'system', 'smartphones', 'oracle', 'want', 'billion', '7,000', 'line', 'total', 'million', 'line', 'android', 'line', 'found', 'critical', 'unfairly', 'google', 'likely', 'appeal', 'failing', 'company', 'total', 'billion', 'first', 'quarter', 'quentin', 'hardyeconomyeurozone', 'finance', 'minister', 'monday', 'afternoon', 'agreement', 'fresh', 'greece', 'minister', 'struggle', 'week', 'bridge', 'european', 'official', 'international', 'monetary', 'demand', 'reaching', 'guarantee', 'prospect', 'greece', 'pay', 'loan', 'realistic', 'greece', 'need', 'funds', 'avoid', 'default', 'debt', 'james', 'kanterentertainmentbeyond', 'brief', 'comment', 'release', 'robert', 'disney', 'chief', 'executive', 'chairman', 'publicly', 'discuss', 'recent', 'abrupt', 'resignation', 'likely', 'successor', 'thomas', 'staggs', 'analyst', 'ch

['slender', 'blank', 'face', 'horror', 'internet', 'inspiration', 'countless', 'short', 'story', 'adaptation', 'grisly', 'stab', 'extend', 'limbed', 'reach', 'mainstream', 'movie', 'audience', 'attract', 'hollywood', 'attention', 'three', 'production', 'studio', 'potential', 'turning', 'slender', 'video', 'virtual', 'reality', 'experience', 'creepy', 'await', 'cinematic', 'slender', 'character', 'subject', 'small', 'budget', 'film', 'recently', 'rumor', 'possible', 'inspiration', 'american', 'horror', 'story', 'first', 'hollywood', 'focusing', 'would', 'explore', 'expansion', 'comic', 'character', 'sinister', 'receive', 'nationwide', 'attention', 'cite', 'inspiration', 'profile', 'crime', '12-year', 'girl', 'waukesha', 'accused', 'stab', 'classmate', 'times', 'official', 'inspire', 'desire', 'impress', 'slender', 'girl', 'still', 'await', 'trial', 'mythology', 'entertainment', 'production', 'company', 'whose', 'works', 'include', 'movie', 'white', 'house', 'truth', 'secure', 'intellect

['washington', 'extraordinary', 'rebuke', 'party', 'presume', 'nominee', 'speaker', 'nation', 'high', 'ranking', 'elect', 'republican', 'thursday', 'ready', 'endorse', 'donald', 'trump', 'president', 'announcement', 'represent', 'split', 'among', 'republican', 'least', 'century', 'trump', 'would', 'unify', 'party', 'essentially', 'clinch', 'nomination', 'victory', 'indiana', 'primary', 'chairman', 'republican', 'national', 'convention', 'repeatedly', 'would', 'support', 'party', 'nominee', 'republican', 'try', 'regain', 'white', 'house', 'solidify', 'control', 'congress', 'combination', 'trump', 'times', 'outrageous', 'remark', 'insult', 'woman', 'hispanic', 'muslim', 'broad', 'rejection', 'republican', 'policy', 'prove', 'toxic', 'defend', 'majority', 'house', 'reputation', 'party', 'viability', 'within', 'trump', 'offer', 'bite', 'rejoinder', 'saying', 'statement', 'ready', 'support', 'speaker', 'agenda.”“perhaps', 'future', 'together', 'agreement', 'american', 'people', 'treat', 'ba

['phoenix', 'lawmaker', 'early', 'wednesday', 'reaffirm', 'arizona', 'state', 'participate', 'program', 'offer', 'health', 'child', 'working', 'proposal', 'restore', 'federal', 'child', 'health', 'insurance', 'program', 'know', 'arizona', 'kidscare', 'stall', 'state', 'senate', 'lawmaker', 'pass', 'billion', 'budget', 'senate', 'leaders', 'voice', 'concern', 'federal', 'government', 'would', 'eventually', 'payment', 'child', 'health', 'program', 'state', 'would', 'force', 'assume', 'state', 'lawmaker', 'originally', 'freeze', 'enrollment', 'program', 'years', 'economic', 'slowdown', 'supporter', 'opportunity', 'restore', 'program', 'congress', 'increase', 'states’', 'match', 'percent', 'allow', 'state', 'include', 'arizona', 'receive', 'reimbursement', 'caput', 'income', 'arizona', 'republican', 'control', 'house', 'vote', 'month', 'allow', 'program', 'restart', 'permit', 'state', 'suspend', 'fewer', 'federal', 'dollar', 'measure', 'tuesday', 'senate', 'president', 'biggs', 'would', 'a

['kathmandu', 'nepal', 'embryo', 'belong', 'somewhere', 'probably', 'empty', 'fertility', 'clinic', 'capital', 'nepal', 'month', 'suspend', 'liquid', 'nitrogen', 'fertility', 'center', 'grande', 'clinic', 'hospital', 'recently', 'operate', 'robust', 'surrogacy', 'business', 'attract', 'would', 'parent', 'around', 'world', 'embryo', 'stick', 'limbo', 'nepal', 'abruptly', 'ban', 'surrogacy', 'september.“we’ve', 'get', 'point', 'rafferty', 'services', 'officer', 'melbourne', 'australia', 'embryo', 'store', 'clinic', 'surrogacy', 'couple', 'biological', 'child', 'implant', 'embryo', 'woman', 'carry', 'child', 'afford', 'process', 'develop', 'country', 'unite', 'state', 'canada', 'look', 'cheap', 'option', 'regulate', 'country', 'nepal', 'india', 'thailand', 'nation', 'door', 'concern', 'exploitation', 'surrogate', 'oversight', 'safety', 'leaving', 'people', 'means', 'without', 'choices.“when', 'global', 'currently', 'doron', 'mamet', 'chief', 'executive', 'officer', 'tammuz', 'surrogacy', 

['sunday', 'congregant', 'serbian', 'orthodox', 'cathedral', 'attend', 'festive', 'orthodox', 'easter', 'celebration', 'bring', 'brightly', 'colored', 'hear', 'altar', 'contribute', 'continue', 'renovation', 'beloved', 'church', 'majestic', 'stone', 'structure', 'street', 'broadway', 'avenue', 'america', 'manhattan', 'landmark', '1968.but', 'monday', 'worshiper', 'begin', 'pilgrimage', 'church', 'instead', 'taking', 'place', 'lean', 'police', 'barricade', 'gaze', 'historic', 'building', 'gutted', 'rage', 'sunday', 'night.“it', 'emotional', 'experience', 'velimir', 'sabic', 'serbian', 'immigrant', 'longtime', 'congregant', 'stand', 'three', 'young', 'child', 'barricade', 'vault', 'renovate', 'several', 'years', 'reduce', 'char', 'spindly', 'skeleton', 'section', 'shambles', 'grand', 'stain', 'glass', 'windows', 'gape', 'chasm', 'cause', 'remain', 'investigation', 'monday', 'sabic', 'family', 'watch', 'century', 'church', 'evening', 'blaze', 'quickly', 'devastate', 'pitch', 'footage', 'r

['flint', 'mich.', 'health', 'worker', 'scramble', 'people', 'chronic', 'consequence', 'water', 'contamination', 'crisis', 'profound', 'stress', 'worry', 'depression', 'guilt', 'uncertainty', 'health', 'health', 'child', 'end', 'nature', 'crisis', 'anger', 'government', 'causing', 'contamination', 'try', 'remedy', 'taking', 'flint', 'residents.“the', 'first', 'thing', 'notice', 'flint', 'quite', 'honestly', 'level', 'anxiety', 'distress', 'nicole', 'lurie', 'assistant', 'secretary', 'department', 'health', 'human', 'services', 'coordinate', 'federal', 'recovery', 'effort', 'since', 'january', 'wednesday', 'president', 'obama', 'first', 'visit', 'since', 'contamination', 'reveal', 'behavioral', 'health', 'specialist', 'unite', 'state', 'public', 'health', 'service', 'begin', 'address', 'mental', 'health', 'problem', 'february', 'provide', 'psychological', 'first', 'training', 'people', 'interest', 'helping', 'others', 'water', 'emergency', 'genesee', 'health', 'system', 'local', 'mental

['florham', 'whose', 'starting', 'quarterback', 'remains', 'unsigned', 'continue', 'annual', 'tradition', 'restock', 'position', 'friday', 'select', 'christian', 'hackenberg', 'state', 'second', 'round', 'n.f.l.', 'draft', 'fourth', 'draft', 'quarterback', 'bryce', 'petty', 'baylor', 'fourth', 'round', 'clemson', 'sixth', 'round', 'smith', 'virginia', 'second', 'round', '2013.“i’m', 'extremely', 'grateful', 'opportunity', 'hackenberg', 'eligibility', 'remain', 'hours', 'this.”ryan', 'fitzpatrick', 'acquire', 'houston', 'texan', 'surprise', 'record', 'bowl', 'first', 'coach', 'although', 'fitzpatrick', 'remains', 'agent', 'still', 'petty', 'smith', 'overall', 'hackenberg', '6-foot-4', '228-pound', 'native', 'palmyra', 'start', 'strongly', 'o’brien', 'o’brien', 'coach', 'texan', 'hackenberg', 'scuffle', 'james', 'franklin', 'general', 'manager', 'maccagnan', 'coach', 'bowl', 'select', 'hackenberg', 'potential', 'ethic', 'character', 'high', 'player', 'rate', 'board', 'change', 'anything'

['washington', 'f.b.i.', 'close', 'wednesday', 'possibility', 'giving', 'apple', 'technical', 'solution', 'government', 'buy', 'unlock', 'iphone', 'attacker', 'shooting', 'bernardino', 'calif', 'decision', 'leaf', 'apple', 'technical', 'details', 'f.b.i.', 'unknown', 'outside', 'group', 'apparently', 'least', 'million', 'manage', 'bypass', 'company', 'vaunt', 'encryption', 'month', 'tense', 'sparring', 'bernardino', 'iphone', 'government', 'decision', 'clear', 'rebuke', 'apple', 'chief', 'executive', 'timothy', 'declare', 'publicly', 'company', 'develop', 'software', 'f.b.i.', 'unlock', 'phone', 'f.b.i.', 'wednesday', 'appear', 'eager', 'return', 'favor', 'refuse', 'divulge', 'finally', 'break', 'decision', 'upset', 'technology', 'industry', 'executive', 'appear', 'counter', 'obama', 'administration', 'promise', 'promote', 'security', 'transparency', 'nation', 'technology', 'operations', 'apple', 'decline', 'comment', 'wednesday', 'f.b.i.', 'official', 'maintain', 'buy', 'outside', 'co

['mexico', 'white', 'along', 'reforma', 'avenue', 'across', 'office', 'attorney', 'general', 'small', 'group', 'gather', 'maintain', 'vigil', '43.the', 'bear', 'black', 'white', 'image', 'forty', 'three', 'student', 'teacher', 'college', 'seize', 'police', 'iguala', 'september', 'never', 'hear', 'literal', 'figurative', 'reminder', 'absence', 'street', 'teem', 'hundred', 'thousand', 'protester', 'whose', 'collective', 'anger', 'help', 'disappearance', 'global', 'indictment', 'impunity', 'gnaw', 'mexico', 'symbol', 'thousand', 'people', 'vanish', 'nation', 'crowd', 'dissipate', 'raising', 'fear', 'spite', 'handling', 'recently', 'criticize', 'international', 'panel', 'expert', 'government', 'political', 'consequences.“just', 'social', 'movement', 'rodrigo', 'gonzález', 'student', 'mexico', 'volunteer', 'live', 'people', 'money', 'distract', 'government', 'exhaustion', 'forget', 'remind', 'society', 'never', 'forget.”public', 'pressure', 'building', 'recent', 'become', 'clear', 'internat

['yankee', 'tampa', 'ninth', 'inning', 'saturday', 'brett', 'gardner', 'step', 'plate', 'begin', 'assess', 'score', 'third', 'seventh', 'inning', 'gardner', 'drive', 'tying', 'yankee', 'fluke', 'single', 'yankees’', 'pitcher', 'likely', 'finish', 'afternoon', 'gardner', 'begin', 'thinking', 'innovatively', 'facing', 'reliever', 'erasmo', 'ramirez', 'gardner', 'want', 'surprise', 'defense', 'steal', 'second', 'carlos', 'beltran', 'perhaps', 'yankees’', 'hitter', 'season', 'attempt', 'drive', 'infielder', 'begin', 'creeping', 'count', 'gardner', 'guess', 'ramirez', 'would', 'beltran', 'envision', 'pitch', 'plate', 'coming', 'gardner', 'guess', 'correctly', 'crush', 'right', 'field', 'sending', 'yankee', 'yankee', 'stadium', 'yankee', 'starter', 'masahiro', 'tanaka', 'perhaps', 'outing', 'season', 'throw', 'season', 'pitch', 'striking', 'seven', 'seven', 'innings', 'allow', 'include', 'dependable', 'bullpen', 'dellin', 'betances', 'andrew', 'miller', 'yankees’', 'carry', 'gardner', 'regis

['dublin', 'century', 'easter', 'rising', 'critical', 'moment', 'ireland', 'quest', 'independence', 'britain', 'event', 'continue', 'polarize', 'opinion', 'ireland', 'beyond', 'regard', 'undemocratic', 'treachery', 'others', 'heroic', 'selflessness', 'whose', 'legacy', 'remains', 'unfulfilled', 'arm', 'revolt', 'irish', 'volunteer', 'irish', 'citizen', 'british', 'begin', 'april', 'last', 'ruthless', 'suppression', 'still', 'cast', 'shadow', 'politics', 'ireland', 'northern', 'ireland', 'rebellion', 'command', 'little', 'popular', 'support', 'decision', 'british', 'authorities', 'execute', 'ringleader', 'would', 'prove', 'catalyst', 'partition', 'island', 'formation', 'independent', 'state', 'south', 'impossible', 'avoid', 'centenary', 'commemoration', 'ireland', 'month', 'every', 'school', 'present', 'national', 'enactment', 'take', 'place', 'street', 'capital', 'state', 'broadcaster', 'pepper', 'schedule', 'historical', 'drama', 'documentary', 'easter', 'sunday', 'military', 'parade'

['benjamin', 'gladiator', 'fry', 'liver', 'attack', 'wildly', 'aggressive', 'chess', 'opening', 'wages', 'assault', 'oppose', 'player', 'opening', 'fainthearted', 'recent', 'friday', 'afternoon', 'beam', 'rattle', 'first', 'move', 'side', 'bishop', 'moment', 'attack', 'knight', 'fry', 'liver', 'attack', 'hairy', 'nothing', 'block', 'lighting', 'benjamin', 'years', 'sitting', 'small', 'wooden', 'chair', 'public', 'school', 'lower', 'school', 'school', 'gift', 'talented', 'student', 'upper', 'sitting', 'might', 'imprecise', 'state', 'constant', 'motion', 'month', 'lower', 'kindergartner', 'first', 'grader', 'finish', 'first', 'state', 'chess', 'tournament', 'defeat', 'elite', 'private', 'school', 'dalton', 'avenue', 'world', 'school', 'earlier', 'school', 'lower', 'first', 'grader', 'national', 'championship', 'grade', 'national', 'tournament', 'first', 'grader', 'national', 'state', 'tournament', 'saratoga', 'weekend', 'remember.“the', 'trophy', 'tall', 'almost', 'jumping', 'dinner', 'p

[]
[]
[]
['riyadh', 'saudi', 'arabia', 'president', 'obama', 'thursday', 'unite', 'state', 'would', 'continue', 'enhance', 'security', 'cooperation', 'allies', 'persian', 'encourage', 'carry', 'domestic', 'reform', 'bolster', 'ability', 'defend', 'comment', 'summit', 'meeting', 'leaders', 'persian', 'nation', 'intend', 'reassure', 'unite', 'state', 'remains', 'commit', 'security', 'pursue', 'rapprochement', 'meeting', 'growing', 'concern', 'among', 'saudi', 'arabia', 'close', 'allies', 'unite', 'state', 'limiting', 'engagement', 'middle', 'take', 'advantage', 'regional', 'turmoil', 'spread', 'influence', 'american', 'official', 'would', 'greater', 'commitment', 'state', 'fighting', 'terrorist', 'group', 'effort', 'become', 'secondary', 'campaign', 'iranian', 'back', 'militant', 'yemen', 'series', 'close', 'sessions', 'obama', 'counterpart', 'discuss', 'range', 'issue', 'civil', 'yemen', 'syria', 'libya', 'struggle', 'effort', 'military', 'economic', 'cooperation', 'fight', 'terrorist',

['angeles', 'lawsuit', 'artist', 'collector', 'seeking', 'return', 'consign', 'works', 'demand', 'profits', 'never', 'stop', 'douglas', 'chrismas', 'founder', 'gallery', 'business', 'early', 'champion', 'trailblazer', 'robert', 'irwin', 'richard', 'serra', 'michael', 'heizer', 'chrismas', 'spend', 'nearly', 'years', 'helping', 'start', 'start', 'career', 'artist', 'scrutinize', 'sometimes', 'failing', 'works', 'april', 'chrismas', 'gallery', 'failing', 'million', 'court', 'order', 'payment', 'settle', 'debt', 'running', 'chapter', 'bankruptcy', 'leslie', 'bankruptcy', 'trustee', 'call', 'facto', 'c.e.o.', 'reorganize', 'business', 'include', '30,000-square', 'gallery', 'historic', 'building', 'wilshire', 'district', 'space', 'beverly', 'hills.“it', 'world', 'leslie', 'train', 'forensic', 'accounting', 'douglas', 'longer', 'involve', 'finances', 'leslie', 'challenge', 'tracing', 'ownership', 'thousand', 'artwork', 'plan', 'satisfy', 'gallery', 'creditor', 'dozens', 'artist', 'could', 'a

['anders', 'behring', 'breivik', 'norwegian', 'extremist', 'kill', 'people', 'rampage', 'life', 'conditions', 'would', 'luxurious', 'american', 'incarceration', 'standard', 'three', 'suite', 'windows', 'include', 'treadmill', 'fridge', 'television', 'player', 'playstation', 'wednesday', 'norwegian', 'court', 'found', 'government', 'violate', 'human', 'right', 'conclude', 'solitary', 'confinement', 'pose', 'threat', 'mental', 'health', 'breivik', 'virtually', 'contact', 'inmate', 'subject', 'frequent', 'strip', 'search', 'search', 'trial', 'march', 'argue', 'isolation', 'amount', 'torture', 'judge', 'helen', 'andenaes', 'sekulic', 'district', 'court', 'oversee', 'trial', 'prison', 'security', 'reason', 'found', 'wednesday', 'prison', 'official', 'violate', 'article', 'european', 'convention', 'human', 'right', 'prohibit', 'inhuman', 'degrade', 'treatment', 'punishment', 'direct', 'government', 'reduce', 'extent', 'breivik', 'isolation', 'though', 'specify', 'order', 'government', 'breiv

['structural', 'scar', 'still', 'destruction', 'world', 'trade', 'center', 'sept.', '2001.they', 'subtle', 'small', 'notice', 'forget', 'dent', 'ding', 'hole', 'gash', 'pockmark', 'silvery', 'liberty', 'street', 'pedestrian', 'bridge', 'lead', 'street', 'brookfield', 'place', 'battery', 'likely', 'cause', 'south', 'tower', 'collapse', 'though', 'saying', 'cause', 'maelstrom', 'resemble', 'divot', 'limestone', 'wall', 'former', 'morgan', 'company', 'headquarters', 'street', 'create', 'killing', 'people', 'injure', 'hundred', 'morgan', 'deliberately', 'street', 'facade', 'unrepaired', 'damage', 'aluminum', 'panel', 'liberty', 'street', 'replace', 'brookfield', 'property', 'partner', 'owner', 'brookfield', 'place', 'rebuilding', 'bridge', 'liberty', 'construction', 'south', 'trade', 'center', 'pockmark', 'history', 'however', 'brookfield', 'executive', 'talking', 'national', 'september', 'memorial', 'museum', 'department', 'salvage', 'least', 'panels.“if', 'preserve', 'would', 'honor', 's

['paris', 'journalist', 'another', 'enormous', 'document', 'outing', 'global', 'powerful', 'people', 'wealth', 'offshore', 'company', 'prompt', 'panama', 'papers', 'several', 'european', 'government', 'follow', 'measure', 'intend', 'secrecy', 'costing', 'billion', 'dollar', 'revenues.“populist', 'outrage', 'collect', 'single', 'extra', 'pound', 'dollar', 'single', 'criminal', 'george', 'osborne', 'chancellor', 'exchequer', 'britain', 'conference', 'thursday', 'washington', 'big', 'economy', 'european', 'union', 'britain', 'france', 'germany', 'italy', 'spain', 'announce', 'automatically', 'share', 'information', 'beneficial', 'owner', 'shell', 'company', 'overseas', 'trust', 'global', 'campaign', 'expose', 'get', 'gain', 'corrupt', 'secretly', 'wealth', 'evaders', 'benefit', 'system', 'reward', 'anonymity', 'major', 'player', 'coming', 'short', 'unite', 'state', 'rank', 'third', 'behind', 'switzerland', 'financial', 'secrecy', 'index', 'publish', 'justice', 'network', 'nonprofit', 'org

In [11]:
from gensim import corpora
dictionary = corpora.Dictionary(text_data)



In [12]:
corpus = [dictionary.doc2bow(text) for text in text_data]

In [13]:
import pickle
pickle.dump(corpus, open('corpus.pkl', 'wb'))
dictionary.save('dictionary.gensim')

### Try 5 topics

In [14]:
import gensim
NUM_TOPICS = 5
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
ldamodel.save('model5.gensim')

In [15]:
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
    print(topic)

(0, '0.005*"apple" + 0.004*"value" + 0.004*"company" + 0.004*"would"')
(1, '0.009*"trump" + 0.009*"republican" + 0.005*"people" + 0.004*"water"')
(2, '0.005*"state" + 0.004*"first" + 0.004*"money" + 0.004*"would"')
(3, '0.006*"would" + 0.006*"state" + 0.005*"trump" + 0.004*"people"')
(4, '0.010*"student" + 0.007*"percent" + 0.006*"college" + 0.004*"education"')


In [16]:
new_doc = 'Practical Bayesian Optimization of Machine Learning Algorithms'
new_doc = prepare_text_for_lda(new_doc)
new_doc_bow = dictionary.doc2bow(new_doc)
print(new_doc_bow)
print(ldamodel.get_document_topics(new_doc_bow))

[(2873, 1), (4364, 1)]
[(0, 0.06668224), (1, 0.06682994), (2, 0.44925258), (3, 0.06859388), (4, 0.34864137)]


In [17]:
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = 3, id2word=dictionary, passes=15)
ldamodel.save('model3.gensim')
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
    print(topic)

(0, '0.005*"first" + 0.004*"would" + 0.004*"state" + 0.003*"child"')
(1, '0.006*"student" + 0.004*"company" + 0.004*"college" + 0.004*"people"')
(2, '0.009*"trump" + 0.008*"republican" + 0.007*"state" + 0.006*"would"')


In [18]:
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = 10, id2word=dictionary, passes=15)
ldamodel.save('model10.gensim')
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
    print(topic)

(0, '0.012*"student" + 0.007*"college" + 0.005*"state" + 0.004*"include"')
(1, '0.007*"would" + 0.006*"state" + 0.006*"child" + 0.006*"woman"')
(2, '0.009*"first" + 0.009*"school" + 0.008*"chess" + 0.007*"tournament"')
(3, '0.008*"jupiter" + 0.005*"taiwan" + 0.005*"crane" + 0.005*"first"')
(4, '0.018*"trump" + 0.011*"campaign" + 0.005*"talbott" + 0.005*"gardner"')
(5, '0.009*"state" + 0.008*"money" + 0.006*"would" + 0.006*"breivik"')
(6, '0.017*"trump" + 0.013*"republican" + 0.007*"party" + 0.005*"would"')
(7, '0.014*"redstone" + 0.010*"police" + 0.009*"welfare" + 0.007*"dauman"')
(8, '0.008*"boeing" + 0.007*"fadell" + 0.006*"company" + 0.006*"would"')
(9, '0.007*"percent" + 0.005*"apple" + 0.005*"value" + 0.004*"birch"')


### pyLDAvis

In [19]:
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = 50, id2word=dictionary, passes=15)
ldamodel.save('mode50.gensim')
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
    print(topic)

(3, '0.022*"jupiter" + 0.014*"glixel" + 0.013*"mission" + 0.012*"orbit"')
(15, '0.017*"grass" + 0.017*"wimbledon" + 0.015*"court" + 0.013*"serve"')
(0, '0.006*"revolution" + 0.006*"commonly" + 0.006*"tremendous" + 0.006*"apparently"')
(49, '0.026*"woman" + 0.020*"swimming" + 0.016*"hours" + 0.011*"public"')
(34, '0.020*"vietnam" + 0.013*"local" + 0.009*"hanoi" + 0.009*"sherer"')
(12, '0.011*"welfare" + 0.010*"water" + 0.008*"health" + 0.008*"people"')
(36, '0.032*"trump" + 0.020*"republican" + 0.009*"party" + 0.008*"would"')
(24, '0.022*"gallery" + 0.018*"artist" + 0.017*"chrismas" + 0.010*"recipe"')
(8, '0.031*"boeing" + 0.018*"polio" + 0.014*"state" + 0.013*"health"')
(37, '0.028*"element" + 0.018*"elements" + 0.015*"name" + 0.015*"names"')
(32, '0.000*"student" + 0.000*"would" + 0.000*"state" + 0.000*"company"')
(47, '0.024*"taiwan" + 0.023*"crane" + 0.012*"hackenberg" + 0.011*"bird"')
(29, '0.024*"birch" + 0.012*"wyeth" + 0.010*"furniture" + 0.009*"design"')
(41, '0.000*"jupiter" +

In [20]:
dictionary = gensim.corpora.Dictionary.load('dictionary.gensim')
corpus = pickle.load(open('corpus.pkl', 'rb'))
lda = gensim.models.ldamodel.LdaModel.load('model5.gensim')

In [21]:
import pyLDAvis.gensim
lda_display = pyLDAvis.gensim.prepare(lda, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [22]:
lda3 = gensim.models.ldamodel.LdaModel.load('model3.gensim')
lda_display3 = pyLDAvis.gensim.prepare(lda3, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display3)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [23]:
lda10 = gensim.models.ldamodel.LdaModel.load('model10.gensim')
lda_display10 = pyLDAvis.gensim.prepare(lda10, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display10)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [24]:
lda50 = gensim.models.ldamodel.LdaModel.load('mode50.gensim')
lda_display50 = pyLDAvis.gensim.prepare(lda50, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display50)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))
