In [1]:
# Plotting tools
import pyLDAvis
import pyLDAvis.gensim  # don't skip this
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import spacy
spacy.load('en')
from spacy.lang.en import English
parser = English()
def tokenize(text):
    lda_tokens = []
    tokens = parser(text)
    for token in tokens:
        if token.orth_.isspace():
            continue
        elif token.like_url:
            lda_tokens.append('URL')
        elif token.orth_.startswith('@'):
            lda_tokens.append('SCREEN_NAME')
        else:
            lda_tokens.append(token.lower_)
    return lda_tokens

In [3]:
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet as wn
def get_lemma(word):
    lemma = wn.morphy(word)
    if lemma is None:
        return word
    else:
        return lemma
    
from nltk.stem.wordnet import WordNetLemmatizer
def get_lemma2(word):
    return WordNetLemmatizer().lemmatize(word)

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/rupamacharyya/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
nltk.download('stopwords')
en_stop = set(nltk.corpus.stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/rupamacharyya/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
def prepare_text_for_lda(text):
    tokens = tokenize(text)
    tokens = [token for token in tokens if len(token) > 4]
    tokens = [token for token in tokens if token not in en_stop]
    tokens = [get_lemma(token) for token in tokens]
    return tokens

In [6]:
import glob
import pickle
def load_pickle(pickle_file):
    try:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f)
    except UnicodeDecodeError as e:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f, encoding='latin1')
    except Exception as e:
        print('Unable to load data ', pickle_file, ':', e)
        raise
    return pickle_data

def text_prepare(talk):
    text_data = []
    for sent_grp in talk:
        if sent_grp==['(Laughter)'] or sent_grp == ['(Applause)']:
            continue 
        else:
            for sent in sent_grp:
                tokens = prepare_text_for_lda(sent)
                if tokens:
                    text_data.append(tokens)
    return text_data

ted_dic = {}
for file in glob.glob("/Users/rupamacharyya/DeepFairness/Data/TED_meta/*.pkl"):
    talk = load_pickle(file)
    ted_dic[talk['talk_meta']['id']] = text_prepare(talk['talk_transcript'])
    print("Processing Document: ",talk['talk_meta']['id'])
    #break
pickle.dump(ted_dic,open('tokenized_text.pkl','wb'))


Processing Document:  1437
Processing Document:  2880
Processing Document:  2658
Processing Document:  1351
Processing Document:  49
Processing Document:  1345
Processing Document:  809
Processing Document:  2894
Processing Document:  1423
Processing Document:  2102
Processing Document:  61
Processing Document:  821
Processing Document:  2664
Processing Document:  2670
Processing Document:  75
Processing Document:  1379
Processing Document:  835
Processing Document:  2116
Processing Document:  606
Processing Document:  2843
Processing Document:  2857
Processing Document:  174
Processing Document:  148
Processing Document:  1153
Processing Document:  1621
Processing Document:  2328
Processing Document:  1147
Processing Document:  2466
Processing Document:  1609
Processing Document:  2300
Processing Document:  2314
Processing Document:  2472
Processing Document:  2499
Processing Document:  1190
Processing Document:  404
Processing Document:  362
Processing Document:  410
Processing Docum

Processing Document:  1421
Processing Document:  2128
Processing Document:  2896
Processing Document:  1347
Processing Document:  73
Processing Document:  833
Processing Document:  1419
Processing Document:  2110
Processing Document:  2104
Processing Document:  199
Processing Document:  67
Processing Document:  2662
Processing Document:  1343
Processing Document:  2892
Processing Document:  1425
Processing Document:  1431
Processing Document:  2138
Processing Document:  9
Processing Document:  2886
Processing Document:  1357
Processing Document:  628
Processing Document:  2879
Processing Document:  2689
Processing Document:  2851
Processing Document:  1380
Processing Document:  614
Processing Document:  172
Processing Document:  1394
Processing Document:  98
Processing Document:  2845
Processing Document:  2474
Processing Document:  2460
Processing Document:  1169
Processing Document:  2306
Processing Document:  1627
Processing Document:  2448
Processing Document:  1141
Processing Docu

Processing Document:  2674
Processing Document:  2660
Processing Document:  65
Processing Document:  2106
Processing Document:  1332
Processing Document:  1454
Processing Document:  1440
Processing Document:  2149
Processing Document:  1326
Processing Document:  2607
Processing Document:  842
Processing Document:  1468
Processing Document:  2161
Processing Document:  2175
Processing Document:  16
Processing Document:  856
Processing Document:  2820
Processing Document:  665
Processing Document:  103
Processing Document:  1483
Processing Document:  117
Processing Document:  2834
Processing Document:  881
Processing Document:  659
Processing Document:  2808
Processing Document:  1656
Processing Document:  2439
Processing Document:  1130
Processing Document:  1124
Processing Document:  1642
Processing Document:  2363
Processing Document:  498
Processing Document:  2405
Processing Document:  2411
Processing Document:  1118
Processing Document:  2377
Processing Document:  1695
Processing Do

Processing Document:  105
Processing Document:  1491
Processing Document:  2198
Processing Document:  2417
Processing Document:  1678
Processing Document:  2371
Processing Document:  2365
Processing Document:  2403
Processing Document:  1122
Processing Document:  1644
Processing Document:  1650
Processing Document:  1888
Processing Document:  2359
Processing Document:  1136
Processing Document:  1863
Processing Document:  1877
Processing Document:  475
Processing Document:  313
Processing Document:  1687
Processing Document:  1693
Processing Document:  307
Processing Document:  2563
Processing Document:  298
Processing Document:  2211
Processing Document:  1718
Processing Document:  2577
Processing Document:  1056
Processing Document:  2239
Processing Document:  1730
Processing Document:  1724
Processing Document:  1042
Processing Document:  1917
Processing Document:  1903
Processing Document:  529
Processing Document:  501
Processing Document:  1095
Processing Document:  267
Processin

Processing Document:  1663
Processing Document:  1893
Processing Document:  2342
Processing Document:  2424
Processing Document:  1139
Processing Document:  2430
Processing Document:  2356
Processing Document:  1887
Processing Document:  320
Processing Document:  1878
Processing Document:  334
Processing Document:  2381
Processing Document:  1850
Processing Document:  1688
Processing Document:  308
Processing Document:  2395
Processing Document:  297
Processing Document:  1703
Processing Document:  1065
Processing Document:  2578
Processing Document:  1071
Processing Document:  1717
Processing Document:  2236
Processing Document:  2550
Processing Document:  1059
Processing Document:  2544
Processing Document:  2222
Processing Document:  254
Processing Document:  1918
Processing Document:  532
Processing Document:  526
Processing Document:  268
Processing Document:  1924
Processing Document:  2593
Processing Document:  2587
Processing Document:  1930
Processing Document:  1267
Processin

Processing Document:  1103
Processing Document:  1665
Processing Document:  2378
Processing Document:  1671
Processing Document:  483
Processing Document:  1117
Processing Document:  468
Processing Document:  1842
Processing Document:  2393
Processing Document:  2387
Processing Document:  1856
Processing Document:  326
Processing Document:  440
Processing Document:  2542
Processing Document:  2224
Processing Document:  1739
Processing Document:  2230
Processing Document:  2556
Processing Document:  1077
Processing Document:  1711
Processing Document:  285
Processing Document:  2218
Processing Document:  2581
Processing Document:  1088
Processing Document:  1936
Processing Document:  1922
Processing Document:  2595
Processing Document:  520
Processing Document:  246
Processing Document:  252
Processing Document:  534
Processing Document:  2026
Processing Document:  2740
Processing Document:  1249
Processing Document:  911
Processing Document:  2032
Processing Document:  1513
Processing 

Processing Document:  2335
Processing Document:  2453
Processing Document:  343
Processing Document:  431
Processing Document:  2484
Processing Document:  419
Processing Document:  1833
Processing Document:  1827
Processing Document:  1199
Processing Document:  2490
Processing Document:  592
Processing Document:  1006
Processing Document:  1760
Processing Document:  1774
Processing Document:  1012
Processing Document:  586
Processing Document:  2533
Processing Document:  2255
Processing Document:  1984
Processing Document:  1990
Processing Document:  2241
Processing Document:  2527
Processing Document:  551
Processing Document:  237
Processing Document:  223
Processing Document:  545
Processing Document:  1947
Processing Document:  2296
Processing Document:  2282
Processing Document:  1953
Processing Document:  1562
Processing Document:  948
Processing Document:  1204
Processing Document:  790
Processing Document:  2719
Processing Document:  1210
Processing Document:  1576
Processing D

Processing Document:  91
Processing Document:  1389
Processing Document:  85
Processing Document:  2680
Processing Document:  2858
Processing Document:  635
Processing Document:  2870
Processing Document:  153
Processing Document:  147
Processing Document:  2864
Processing Document:  621
Processing Document:  2333
Processing Document:  2455
Processing Document:  1148
Processing Document:  2441
Processing Document:  2327
Processing Document:  1606
Processing Document:  392
Processing Document:  1160
Processing Document:  2469
Processing Document:  1174
Processing Document:  386
Processing Document:  1612
Processing Document:  1821
Processing Document:  2496
Processing Document:  2482
Processing Document:  1835
Processing Document:  379
Processing Document:  351
Processing Document:  437
Processing Document:  423
Processing Document:  1809
Processing Document:  345
Processing Document:  1996
Processing Document:  2247
Processing Document:  2521
Processing Document:  2535
Processing Docum

Processing Document:  1374
Processing Document:  1412
Processing Document:  2
Processing Document:  2133
Processing Document:  2655
Processing Document:  50
Processing Document:  804
Processing Document:  44
Processing Document:  2641
Processing Document:  2127


In [7]:
from gensim import corpora
def create_dict_and_corpus(text_data):
    dictionary = corpora.Dictionary(text_data)
    corpus = [dictionary.doc2bow(text) for text in text_data]
    return dictionary,corpus
#     import pickle
#     pickle.dump(corpus, open('corpus.pkl', 'wb'))
#     dictionary.save('dictionary.gensim')
  

scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.
  _deprecated()


In [18]:
import gensim
NUM_TOPICS = 10
NUM_WORDS = 5
def create_lda_model(corpus,dictionary):

    ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
    topics = ldamodel.show_topics(num_topics=NUM_TOPICS, num_words=NUM_WORDS, log=False, formatted=False)    
    return topics,ldamodel


In [9]:
corpus_dic, dictionary_dic = {}, {}
topics_dic = {}
for key in ted_dic:
    print("Processing Document: ",key)
    dictionary,corpus = create_dict_and_corpus(ted_dic[key])
    corpus_dic[key] = corpus
    dictionary_dic[key] = dictionary

pickle.dump((corpus_dic,dictionary_dic),open('corpus_dictionary.pkl','wb'))  

Processing Document:  1437
Processing Document:  2880
Processing Document:  2658
Processing Document:  1351
Processing Document:  49
Processing Document:  1345
Processing Document:  809
Processing Document:  2894
Processing Document:  1423
Processing Document:  2102
Processing Document:  61
Processing Document:  821
Processing Document:  2664
Processing Document:  2670
Processing Document:  75
Processing Document:  1379
Processing Document:  835
Processing Document:  2116
Processing Document:  606
Processing Document:  2843
Processing Document:  2857
Processing Document:  174
Processing Document:  148
Processing Document:  1153
Processing Document:  1621
Processing Document:  2328
Processing Document:  1147
Processing Document:  2466
Processing Document:  1609
Processing Document:  2300
Processing Document:  2314
Processing Document:  2472
Processing Document:  2499
Processing Document:  1190
Processing Document:  404
Processing Document:  362
Processing Document:  410
Processing Docum

Processing Document:  833
Processing Document:  1419
Processing Document:  2110
Processing Document:  2104
Processing Document:  199
Processing Document:  67
Processing Document:  2662
Processing Document:  1343
Processing Document:  2892
Processing Document:  1425
Processing Document:  1431
Processing Document:  2138
Processing Document:  9
Processing Document:  2886
Processing Document:  1357
Processing Document:  628
Processing Document:  2879
Processing Document:  2689
Processing Document:  2851
Processing Document:  1380
Processing Document:  614
Processing Document:  172
Processing Document:  1394
Processing Document:  98
Processing Document:  2845
Processing Document:  2474
Processing Document:  2460
Processing Document:  1169
Processing Document:  2306
Processing Document:  1627
Processing Document:  2448
Processing Document:  1141
Processing Document:  1155
Processing Document:  1633
Processing Document:  1800
Processing Document:  1814
Processing Document:  358
Processing Doc

Processing Document:  16
Processing Document:  856
Processing Document:  2820
Processing Document:  665
Processing Document:  103
Processing Document:  1483
Processing Document:  117
Processing Document:  2834
Processing Document:  881
Processing Document:  659
Processing Document:  2808
Processing Document:  1656
Processing Document:  2439
Processing Document:  1130
Processing Document:  1124
Processing Document:  1642
Processing Document:  2363
Processing Document:  498
Processing Document:  2405
Processing Document:  2411
Processing Document:  1118
Processing Document:  2377
Processing Document:  1695
Processing Document:  301
Processing Document:  467
Processing Document:  473
Processing Document:  1859
Processing Document:  315
Processing Document:  2388
Processing Document:  1865
Processing Document:  329
Processing Document:  1722
Processing Document:  1044
Processing Document:  1050
Processing Document:  2559
Processing Document:  1736
Processing Document:  2217
Processing Docu

Processing Document:  1725
Processing Document:  1043
Processing Document:  1057
Processing Document:  1731
Processing Document:  2238
Processing Document:  1719
Processing Document:  2210
Processing Document:  2576
Processing Document:  2562
Processing Document:  2204
Processing Document:  299
Processing Document:  306
Processing Document:  1692
Processing Document:  474
Processing Document:  312
Processing Document:  1876
Processing Document:  1862
Processing Document:  2358
Processing Document:  1889
Processing Document:  1651
Processing Document:  1137
Processing Document:  1645
Processing Document:  2364
Processing Document:  2402
Processing Document:  2416
Processing Document:  2370
Processing Document:  1679
Processing Document:  662
Processing Document:  2827
Processing Document:  2199
Processing Document:  1490
Processing Document:  104
Processing Document:  110
Processing Document:  1484
Processing Document:  2833
Processing Document:  676
Processing Document:  886
Processing

Processing Document:  453
Processing Document:  335
Processing Document:  1879
Processing Document:  321
Processing Document:  447
Processing Document:  2431
Processing Document:  1138
Processing Document:  1886
Processing Document:  2357
Processing Document:  2343
Processing Document:  1892
Processing Document:  2425
Processing Document:  490
Processing Document:  1104
Processing Document:  2419
Processing Document:  1110
Processing Document:  484
Processing Document:  2828
Processing Document:  679
Processing Document:  2182
Processing Document:  651
Processing Document:  889
Processing Document:  2814
Processing Document:  2800
Processing Document:  645
Processing Document:  123
Processing Document:  2155
Processing Document:  876
Processing Document:  36
Processing Document:  2633
Processing Document:  2627
Processing Document:  862
Processing Document:  22
Processing Document:  1448
Processing Document:  2141
Processing Document:  1460
Processing Document:  2169
Processing Documen

Processing Document:  2351
Processing Document:  2806
Processing Document:  643
Processing Document:  125
Processing Document:  131
Processing Document:  657
Processing Document:  2812
Processing Document:  2184
Processing Document:  119
Processing Document:  1499
Processing Document:  18
Processing Document:  680
Processing Document:  1314
Processing Document:  1472
Processing Document:  1466
Processing Document:  2609
Processing Document:  694
Processing Document:  2621
Processing Document:  1328
Processing Document:  2147
Processing Document:  2153
Processing Document:  30
Processing Document:  2635
Processing Document:  2145
Processing Document:  26
Processing Document:  866
Processing Document:  2623
Processing Document:  2637
Processing Document:  32
Processing Document:  872
Processing Document:  1458
Processing Document:  2151
Processing Document:  1470
Processing Document:  2179
Processing Document:  682
Processing Document:  696
Processing Document:  1302
Processing Document:

Processing Document:  1429
Processing Document:  2120
Processing Document:  156
Processing Document:  630
Processing Document:  2875
Processing Document:  2861
Processing Document:  142
Processing Document:  2691
Processing Document:  2849
Processing Document:  94
Processing Document:  1398
Processing Document:  80
Processing Document:  618
Processing Document:  2685
Processing Document:  1165
Processing Document:  1603
Processing Document:  383
Processing Document:  1617
Processing Document:  2478
Processing Document:  1171
Processing Document:  1159
Processing Document:  2336
Processing Document:  2322
Processing Document:  432
Processing Document:  1818
Processing Document:  354
Processing Document:  340
Processing Document:  426
Processing Document:  2493
Processing Document:  1824
Processing Document:  1830
Processing Document:  2487
Processing Document:  1011
Processing Document:  585
Processing Document:  2518
Processing Document:  1777
Processing Document:  1763
Processing Docu

Processing Document:  1361
Processing Document:  86
Processing Document:  2683
Processing Document:  178
Processing Document:  2697
Processing Document:  92
Processing Document:  2867
Processing Document:  622
Processing Document:  144
Processing Document:  2873
Processing Document:  2324
Processing Document:  2442
Processing Document:  2456
Processing Document:  1639
Processing Document:  2330
Processing Document:  385
Processing Document:  1611
Processing Document:  2318
Processing Document:  1177
Processing Document:  1163
Processing Document:  3012
Processing Document:  1605
Processing Document:  391
Processing Document:  1836
Processing Document:  2481
Processing Document:  1188
Processing Document:  408
Processing Document:  2495
Processing Document:  1822
Processing Document:  346
Processing Document:  420
Processing Document:  434
Processing Document:  2250
Processing Document:  1981
Processing Document:  1759
Processing Document:  2536
Processing Document:  2522
Processing Doc

In [10]:
corpus_dic, dictionary_dic = load_pickle('corpus_dictionary.pkl')

In [19]:
topics_dic = {}
ldamodel_dic = {}

for key in ted_dic:
    print("Processing Document: ",key)
    corpus, dictionary = corpus_dic[key], dictionary_dic[key]
    topics,ldamodel = create_lda_model(corpus,dictionary)
    topics_dic[key] = topics
    ldamodel_dic[key] = ldamodel
    
#     vis = pyLDAvis.gensim.prepare(ldamodel, corpus, dictionary)
#     vis
    #print(topics)
pickle.dump(ldamodel_dic,open('ldamodel_dic.pkl','wb'))
pickle.dump(topics_dic,open('topics_dic_'+str(NUM_TOPICS)+'_'+str(NUM_WORDS)+'.pkl','wb'))

Processing Document:  1437
Processing Document:  2880
Processing Document:  2658
Processing Document:  1351
Processing Document:  49
Processing Document:  1345
Processing Document:  809
Processing Document:  2894
Processing Document:  1423
Processing Document:  2102
Processing Document:  61
Processing Document:  821
Processing Document:  2664
Processing Document:  2670
Processing Document:  75
Processing Document:  1379
Processing Document:  835
Processing Document:  2116
Processing Document:  606
Processing Document:  2843
Processing Document:  2857
Processing Document:  174
Processing Document:  148
Processing Document:  1153
Processing Document:  1621
Processing Document:  2328
Processing Document:  1147
Processing Document:  2466
Processing Document:  1609
Processing Document:  2300
Processing Document:  2314
Processing Document:  2472
Processing Document:  2499
Processing Document:  1190
Processing Document:  404
Processing Document:  362
Processing Document:  410
Processing Docum

Processing Document:  1421
Processing Document:  2128
Processing Document:  2896
Processing Document:  1347
Processing Document:  73
Processing Document:  833
Processing Document:  1419
Processing Document:  2110
Processing Document:  2104
Processing Document:  199
Processing Document:  67
Processing Document:  2662
Processing Document:  1343
Processing Document:  2892
Processing Document:  1425
Processing Document:  1431
Processing Document:  2138
Processing Document:  9
Processing Document:  2886
Processing Document:  1357
Processing Document:  628
Processing Document:  2879
Processing Document:  2689
Processing Document:  2851
Processing Document:  1380
Processing Document:  614
Processing Document:  172
Processing Document:  1394
Processing Document:  98
Processing Document:  2845
Processing Document:  2474
Processing Document:  2460
Processing Document:  1169
Processing Document:  2306
Processing Document:  1627
Processing Document:  2448
Processing Document:  1141
Processing Docu

Processing Document:  2674
Processing Document:  2660
Processing Document:  65
Processing Document:  2106
Processing Document:  1332
Processing Document:  1454
Processing Document:  1440
Processing Document:  2149
Processing Document:  1326
Processing Document:  2607
Processing Document:  842
Processing Document:  1468
Processing Document:  2161
Processing Document:  2175
Processing Document:  16
Processing Document:  856
Processing Document:  2820
Processing Document:  665
Processing Document:  103
Processing Document:  1483
Processing Document:  117
Processing Document:  2834
Processing Document:  881
Processing Document:  659
Processing Document:  2808
Processing Document:  1656
Processing Document:  2439
Processing Document:  1130
Processing Document:  1124
Processing Document:  1642
Processing Document:  2363
Processing Document:  498
Processing Document:  2405
Processing Document:  2411
Processing Document:  1118
Processing Document:  2377
Processing Document:  1695
Processing Do

Processing Document:  105
Processing Document:  1491
Processing Document:  2198
Processing Document:  2417
Processing Document:  1678
Processing Document:  2371
Processing Document:  2365
Processing Document:  2403
Processing Document:  1122
Processing Document:  1644
Processing Document:  1650
Processing Document:  1888
Processing Document:  2359
Processing Document:  1136
Processing Document:  1863
Processing Document:  1877
Processing Document:  475
Processing Document:  313
Processing Document:  1687
Processing Document:  1693
Processing Document:  307
Processing Document:  2563
Processing Document:  298
Processing Document:  2211
Processing Document:  1718
Processing Document:  2577
Processing Document:  1056
Processing Document:  2239
Processing Document:  1730
Processing Document:  1724
Processing Document:  1042
Processing Document:  1917
Processing Document:  1903
Processing Document:  529
Processing Document:  501
Processing Document:  1095
Processing Document:  267
Processin

Processing Document:  2418
Processing Document:  1105
Processing Document:  1663
Processing Document:  1893
Processing Document:  2342
Processing Document:  2424
Processing Document:  1139
Processing Document:  2430
Processing Document:  2356
Processing Document:  1887
Processing Document:  320
Processing Document:  1878
Processing Document:  334
Processing Document:  2381
Processing Document:  1850
Processing Document:  1688
Processing Document:  308
Processing Document:  2395
Processing Document:  297
Processing Document:  1703
Processing Document:  1065
Processing Document:  2578
Processing Document:  1071
Processing Document:  1717
Processing Document:  2236
Processing Document:  2550
Processing Document:  1059
Processing Document:  2544
Processing Document:  2222
Processing Document:  254
Processing Document:  1918
Processing Document:  532
Processing Document:  526
Processing Document:  268
Processing Document:  1924
Processing Document:  2593
Processing Document:  2587
Processin

Processing Document:  1659
Processing Document:  1881
Processing Document:  1895
Processing Document:  2344
Processing Document:  1103
Processing Document:  1665
Processing Document:  2378
Processing Document:  1671
Processing Document:  483
Processing Document:  1117
Processing Document:  468
Processing Document:  1842
Processing Document:  2393
Processing Document:  2387
Processing Document:  1856
Processing Document:  326
Processing Document:  440
Processing Document:  2542
Processing Document:  2224
Processing Document:  1739
Processing Document:  2230
Processing Document:  2556
Processing Document:  1077
Processing Document:  1711
Processing Document:  285
Processing Document:  2218
Processing Document:  2581
Processing Document:  1088
Processing Document:  1936
Processing Document:  1922
Processing Document:  2595
Processing Document:  520
Processing Document:  246
Processing Document:  252
Processing Document:  534
Processing Document:  2026
Processing Document:  2740
Processing

Processing Document:  1166
Processing Document:  2447
Processing Document:  2321
Processing Document:  1628
Processing Document:  2335
Processing Document:  2453
Processing Document:  343
Processing Document:  431
Processing Document:  2484
Processing Document:  419
Processing Document:  1833
Processing Document:  1827
Processing Document:  1199
Processing Document:  2490
Processing Document:  592
Processing Document:  1006
Processing Document:  1760
Processing Document:  1774
Processing Document:  1012
Processing Document:  586
Processing Document:  2533
Processing Document:  2255
Processing Document:  1984
Processing Document:  1990
Processing Document:  2241
Processing Document:  2527
Processing Document:  551
Processing Document:  237
Processing Document:  223
Processing Document:  545
Processing Document:  1947
Processing Document:  2296
Processing Document:  2282
Processing Document:  1953
Processing Document:  1562
Processing Document:  948
Processing Document:  1204
Processing 

Processing Document:  1404
Processing Document:  2119
Processing Document:  1410
Processing Document:  184
Processing Document:  1376
Processing Document:  2694
Processing Document:  91
Processing Document:  1389
Processing Document:  85
Processing Document:  2680
Processing Document:  2858
Processing Document:  635
Processing Document:  2870
Processing Document:  153
Processing Document:  147
Processing Document:  2864
Processing Document:  621
Processing Document:  2333
Processing Document:  2455
Processing Document:  1148
Processing Document:  2441
Processing Document:  2327
Processing Document:  1606
Processing Document:  392
Processing Document:  1160
Processing Document:  2469
Processing Document:  1174
Processing Document:  386
Processing Document:  1612
Processing Document:  1821
Processing Document:  2496
Processing Document:  2482
Processing Document:  1835
Processing Document:  379
Processing Document:  351
Processing Document:  437
Processing Document:  423
Processing Docum

Processing Document:  87
Processing Document:  179
Processing Document:  192
Processing Document:  1360
Processing Document:  2669
Processing Document:  838
Processing Document:  78
Processing Document:  1374
Processing Document:  1412
Processing Document:  2
Processing Document:  2133
Processing Document:  2655
Processing Document:  50
Processing Document:  804
Processing Document:  44
Processing Document:  2641
Processing Document:  2127


In [16]:
topics_dic = load_pickle('topics_dic_'+str(NUM_TOPICS)+'_'+str(NUM_WORDS)+'.pkl')

In [17]:
lda_model = load_pickle('ldamodel_dic.pkl')
key = list(ldamodel_dic.keys())[0]
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim.prepare(ldamodel_dic[key], corpus_dic[key], dictionary_dic[key])
vis

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))
