# An exploration on topic modeling. 
# Searching through the LDA hyperparameters to find topics that are meaningful.

In [1]:
#Special module written for this class
import lucem_illud #pip install -U git+git://github.com/Computational-Content-Analysis-2018/lucem_illud.git

#These are all for the cluster detection
import sklearn
import sklearn.feature_extraction.text
import sklearn.pipeline
import sklearn.preprocessing
import sklearn.datasets
import sklearn.cluster
import sklearn.decomposition
import sklearn.metrics

import scipy #For hierarchical clustering and some visuals
import gensim#For topic modeling
import nltk #the Natural Language Toolkit
import requests #For downloading our datasets
import numpy as np #for arrays
import pandas #gives us DataFrames
import matplotlib.pyplot as plt #For graphics
import matplotlib.cm #Still for graphics
import seaborn as sns #Makes the graphics look nicer

import pandas as pd
import itertools
import json

%matplotlib inline

In [8]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

<IPython.core.display.Javascript object>

In [3]:
news_df1 = pd.read_pickle('../news_df_norm1.pkl')
news_df2 = pd.read_pickle('../news_df_norm2.pkl')
news_df = pd.concat([news_df1, news_df2])

In [6]:
def dropMissing(wordLst, vocab):
    return [w for w in wordLst if w in vocab]

In [10]:
def topic_loop(df, stop_words, num_topics, max_dfs, max_features, norm_type, alphas, etas, random_number=None):
    """
    Outputs top topics words according throughout all the hyperparameter combinations.
    Params:
        df: Dataframe with text to be analyzed. Requires column named "normalized tokens" with text 
            as tokens that have been stemmed and lemmatized.
        random_number: Seed for the LdaModel. Default None will give out a different model every 
            time for the same parameter combination.
        All other parameters are lists of that param name to be passed to gensim.LdaModel
    """
    for num in num_topics:
        for d in max_dfs:
            for f in max_features:

                TFVectorizer = sklearn.feature_extraction.text.TfidfVectorizer(max_df=d, min_df=20, 
                    max_features=f, stop_words=stop_words)
                TFVects = TFVectorizer.fit_transform(df['text'])
                
                for norm in norm_type:
                    # drop
                    reduced_token_str = 'reduced_tokens_' + norm + '_' + str(num) + '_' + str(d) + '_' + str(f)
                    norm_col = 'normalized_tokens' + norm
                    df[reduced_token_str] = df[norm_col].apply(
                        lambda x: dropMissing(x, TFVectorizer.vocabulary_.keys()))

                    # Make dictionary
                    dictionary = gensim.corpora.Dictionary(df[reduced_token_str])

                    # Make corpus
                    corpus = [dictionary.doc2bow(text) for text in df[reduced_token_str]]

                    # Serialize
                    #serial_str = 'news_{}.mm'.format(str(d))
                    #gensim.corpora.MmCorpus.serialize(serial_str, corpus)
                    #newsm = gensim.corpora.MmCorpus(serial_str)

                    # Topic model
                    for a in alphas:
                        for e in etas: 
                            lda = gensim.models.ldamodel.LdaModel(corpus=corpus, 
                                id2word=dictionary, num_topics=num, alpha=a, eta=e, random_state=random_number)

                            topicsDict = {}
                            for topicNum in range(lda.num_topics):
                                topicWords = [w for w, p in lda.show_topic(topicNum)]
                                topicsDict['Topic_{}'.format(topicNum)] = topicWords

                            wordRanksDF = pandas.DataFrame(topicsDict)
                            print('Norm: ', norm, 'Num topics: ', num, ', Max df: ', d,
                                  ' Max features: ', f, ' Alpha: ', a, ' Eta: ', e)
                            print(wordRanksDF)
                            print()        

<br> <br>
The rest of the notebook is a search through different parameters looking for topics that show some internal semantic consistency. The ultimate hyperparameter combination was chosen based on this, whether the words within the topic seemed to reflect a consistent theme.

In [13]:
# num_topics = [3,8,20]
# max_dfs = [0.5, 0.7]
# max_features = [1000, 3000]
# norm_type = [ '_stem']

# alphas = [ 0.001, 0.1, 0.5]
# etas = [0.001, 0.1, 0.5]
topic_loop(news_df, num_topics, max_dfs, max_features, norm_type, alphas, etas)

  diff = np.log(self.expElogbeta)


Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.001
     Topic_0   Topic_1 Topic_2
0      trump      year    year
1      state      risk  nation
2         us   develop  report
3  agreement    impact      us
4    develop      time   trump
5     nation       may   state
6      water  research    time
7       year      like    like
8       need       say     say
9       work      food  carbon

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.1
     Topic_0      Topic_1 Topic_2
0      trump         year    year
1         us       nation   state
2  agreement      develop    need
3      state       report    like
4      china        trump   water
5     nation        state     say
6       year           us  action
7    develop         like    time
8     accord  environment    make
9      power         risk    plan

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.5
       Topi

Norm:  _stem Num topics:  3 , Max df:  0.7  Max features:  1000  Alpha:  0.001  Eta:  0.1
  Topic_0  Topic_1 Topic_2
0    year     said    said
1    said    trump    year
2   would       us    also
3  global   global  report
4   trump    state   state
5     one    world  nation
6   world     year  global
7     say  develop    risk
8   water     also  impact
9    like    china    fund

Norm:  _stem Num topics:  3 , Max df:  0.7  Max features:  1000  Alpha:  0.001  Eta:  0.5
  Topic_0  Topic_1    Topic_2
0    said     said      trump
1    year  develop         us
2   state    china       said
3    also    world     global
4     one     year      state
5  global   global  agreement
6  nation   nation      world
7   world      new       year
8   would     also       also
9   trump    would      would

Norm:  _stem Num topics:  3 , Max df:  0.7  Max features:  1000  Alpha:  0.1  Eta:  0.001
     Topic_0  Topic_1  Topic_2
0       said     said  develop
1      trump     year     year
2      s

Norm:  _stem Num topics:  8 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.5
     Topic_0    Topic_1 Topic_2    Topic_3  Topic_4  Topic_5  Topic_6  \
0       year       year    like      trump  develop      per  nigeria   
1  scientist         us    year         us     food     plan  develop   
2     report       fuel   water  agreement     year     year    india   
3     nation    develop     use      state    china    state   nation   
4       rise     fossil   flood      china   africa     cent    state   
5        say        oil    time     accord   market     need   forest   
6       like        gas   human     nation   nation  develop    water   
7        sea  agreement    need         mr   region      say    level   
8    weather     nation  impact     leader  african      use  protect   
9      level     action    even       year     work   nation   effect   

       Topic_7  
0        water  
1        state  
2         year  
3  environment  
4       nation  
5   

Norm:  _stem Num topics:  8 , Max df:  0.5  Max features:  3000  Alpha:  0.001  Eta:  0.1
  Topic_0    Topic_1   Topic_2 Topic_3    Topic_4    Topic_5   Topic_6  \
0    year  agreement    invest   water      trump       warm   develop   
1    time       year   develop     per         us      trump     state   
2    make         mr      year    year  agreement       year      plan   
3    work       coal      risk    cent      state       like   sustain   
4   state         us    carbon    food      china     report  research   
5      us     nation       oil  farmer     accord        say     water   
6     say     carbon  investor   state     nation      state    impact   
7  nation      state      fund    need       unit  scientist    forest   
8   human      plant      need  report     commit       rise      year   
9   think        per       gas  nation       deal      storm    nation   

    Topic_7  
0    nation  
1     china  
2        us  
3     state  
4   develop  
5      year

Norm:  _stem Num topics:  8 , Max df:  0.7  Max features:  1000  Alpha:  0.001  Eta:  0.001
   Topic_0    Topic_1   Topic_2  Topic_3  Topic_4 Topic_5  Topic_6   Topic_7
0   global       said       per     said  develop    year     said    carbon
1    world      trump  scotland       us     said     one    water      said
2    china      state      cent     year  project    like     also      year
3     said         us      said    world     also    time     year  research
4       us  agreement       new  develop     risk     say    level     world
5    india     global      year    state     fund   would   global    global
6  develop     nation      also  nigeria     plan    make  nigeria     power
7     year      would    global   action   nation   world    world       gas
8     also     report    report   nation    green  global      sea       new
9   nation       also        uk   global     year   human     rise      also

Norm:  _stem Num topics:  8 , Max df:  0.7  Max features:  1

Norm:  _stem Num topics:  8 , Max df:  0.7  Max features:  3000  Alpha:  0.001  Eta:  0.001
     Topic_0  Topic_1 Topic_2  Topic_3  Topic_4  Topic_5   Topic_6  Topic_7
0      trump     said    warm     said     said     said      said     said
1         us  develop   would     year      per    china   develop    water
2       said    world    year    state     cent  develop      year     also
3  agreement     also   state  student     year  nigeria       use   report
4     global     year    said      one   global   global      also   impact
5      world   nation    time    world     need   nation     world     year
6      state      one  global     also      new    world       one    state
7      would    china     say      new    green     also      make   global
8     nation   global    like   school   invest     year       new   nation
9     accord    would     one    event  project       us  research  develop

Norm:  _stem Num topics:  8 , Max df:  0.7  Max features:  3000  Alpha:

Norm:  _stem Num topics:  20 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.001
   Topic_0  Topic_1  Topic_10   Topic_11  Topic_12     Topic_13 Topic_14  \
0   carbon    state    report  scientist    school         year   health   
1     data   nation    macron       year      year         need    women   
2    could     plan    africa        sea       say        water       uk   
3     need  develop   african       rise      work         time       dr   
4     fuel   action  research      storm     flood      project     work   
5   fossil    china    survey      level      time       action   nation   
6     year   report      year     harvey      like        dubai    state   
7  support    water    budget      trump      live         show     risk   
8     like   impact       per      ocean  children  environment  develop   
9      per     risk    french        ice      make      develop   impact   

   Topic_15 Topic_16 Topic_17 Topic_18 Topic_19  Topic_2  Topic_3    T

Norm:  _stem Num topics:  20 , Max df:  0.5  Max features:  1000  Alpha:  0.1  Eta:  0.1
  Topic_0  Topic_1     Topic_10 Topic_11 Topic_12    Topic_13 Topic_14  \
0     per     year  environment    china    water       trump    egypt   
1    cent    storm        event     year     year          us      uae   
2    year  weather         year     time     risk       power     plan   
3  carbon    flood       nation  support   school        like  develop   
4      us  nigeria         area    saudi   report  republican    china   
5   trump    state         time    right      per        time       al   
6     gas      sea        march     make    state       obama    dubai   
7  target    water      protect   nation     need       state    state   
8     use    ocean          day   macron     cent      carbon   market   
9    fuel   harvey         work       us  drought         epa     year   

  Topic_15 Topic_16   Topic_17 Topic_18 Topic_19  Topic_2    Topic_3  \
0    china   summit  agr

Norm:  _stem Num topics:  20 , Max df:  0.5  Max features:  1000  Alpha:  0.5  Eta:  0.5
  Topic_0  Topic_1   Topic_10 Topic_11     Topic_12 Topic_13 Topic_14  \
0    rise    china       year     work       report  nigeria     year   
1  report      law     carbon  project           mr     year       us   
2     sea  develop       risk    state         year    green     like   
3  nation     year       time    water      develop  develop      ice   
4   level   region       like     year       school   nation    human   
5  region    right  scientist      say       accord     need    level   
6  impact   nation       fuel      uae       health    march   carbon   
7    year   report        say     lake          say    india     even   
8   state     work     report  develop  environment       us     fuel   
9   could       al       need     help       nation       go     rise   

  Topic_15   Topic_16  Topic_17     Topic_18 Topic_19    Topic_2 Topic_3  \
0  develop     nation      year

Norm:  _stem Num topics:  20 , Max df:  0.5  Max features:  3000  Alpha:  0.1  Eta:  0.001
    Topic_0    Topic_1   Topic_10 Topic_11  Topic_12     Topic_13   Topic_14  \
0    invest      trump      trump  develop     china        state         al   
1      risk  agreement         us   forest  nigerian         plan       year   
2  investor         us      state     year        us       nation       work   
3       oil     accord         mr     plan      year     scotland        way   
4      year      india     nation   nation    nation         fund       time   
5      fund     commit     leader  project   develop  environment         us   
6    carbon     carbon       year    water     trade         coal       make   
7       gas     nation       unit    state     trump       action  scientist   
8     price     action   american  billion        xi      project       like   
9    market   withdraw  agreement   impact   nigeria        trump        use   

  Topic_15  Topic_16  Topic_

Norm:  _stem Num topics:  20 , Max df:  0.5  Max features:  3000  Alpha:  0.5  Eta:  0.1
    Topic_0 Topic_1   Topic_10 Topic_11  Topic_12 Topic_13     Topic_14  \
0     state   water      trump      per     china   nation         year   
1     women   state         us     cent     trump  develop        march   
2    health   flood  agreement     year    leader    state        event   
3   student    year      state     food        us     risk        earth   
4    school  farmer     accord    water     trade     year        human   
5      year  impact       year   carbon  european   report      protect   
6      work    food     nation    could      meet     plan         hour   
7  children     use       unit      say  nigerian      gov         work   
8   develop    plan     commit      use    summit   action  environment   
9    nation    need         mr    price    macron     work       nation   

      Topic_15  Topic_16  Topic_17 Topic_18  Topic_19    Topic_2   Topic_3  \
0      

Norm:  _stem Num topics:  20 , Max df:  0.7  Max features:  1000  Alpha:  0.001  Eta:  0.5
   Topic_0   Topic_1  Topic_10 Topic_11   Topic_12 Topic_13 Topic_14  \
0    china      said     world    state      trump    trump    water   
1  develop      year     china   health       year       mr    storm   
2   global     saudi        us     said     report     said    level   
3     said     world    nation    india       time    would   harvey   
4      law      also    global     also        say       us     rise   
5       us    global      year     plan  scientist     coal      one   
6    world     china    leader  project         us    power     like   
7   nation      reef     egypt   nation       like      new    human   
8      new     trade  american  develop        one  support     time   
9    state  research     trump     area     global      one  develop   

   Topic_15     Topic_16 Topic_17 Topic_18   Topic_19  Topic_2   Topic_3  \
0     china       forest  develop     ye

Norm:  _stem Num topics:  20 , Max df:  0.7  Max features:  1000  Alpha:  0.5  Eta:  0.001
   Topic_0    Topic_1   Topic_10 Topic_11  Topic_12 Topic_13 Topic_14  \
0     said       said      trump     said      said     food       us   
1   global    develop      state    state   develop    green    world   
2    would       also         us     meet     water  develop   nation   
3    china         us  agreement    china   project  project    china   
4     year     nation       said     also      also     said  develop   
5  develop      dhabi     global   global      plan   market   global   
6     also      india      world     work       uae     need     also   
7  support  agreement     nation  develop    nation   carbon    india   
8      new      would     action     year  research     year     need   
9    world      trump   american    water     state     also    trump   

  Topic_15     Topic_16 Topic_17 Topic_18 Topic_19  Topic_2    Topic_3  \
0     said        trump     ris

Norm:  _stem Num topics:  20 , Max df:  0.7  Max features:  3000  Alpha:  0.001  Eta:  0.1
   Topic_0  Topic_1 Topic_10  Topic_11  Topic_12     Topic_13 Topic_14  \
0     said     said  develop      said      said  environment  develop   
1      sea     year  project      year    global         said     said   
2    level     also   invest      food     world        trump     also   
3     rise  student     fund     world     china          epa      uae   
4   nation   forest    water       one        us       pruitt   global   
5  develop   global     bank      also  european         also   nation   
6   global    state  sustain       use        eu      protect    world   
7    china     tree    world  research     trade         coal    china   
8   alaska  develop   nation   million       new        state    build   
9    state     food     also    farmer      also       nation    dubai   

   Topic_15 Topic_16  Topic_17 Topic_18   Topic_19    Topic_2 Topic_3  \
0      said     said 

Norm:  _stem Num topics:  20 , Max df:  0.7  Max features:  3000  Alpha:  0.1  Eta:  0.5
  Topic_0  Topic_1 Topic_10 Topic_11 Topic_12  Topic_13 Topic_14 Topic_15  \
0  global       us      per     said  develop    invest     year     said   
1   world    world     cent      uae   nation   develop    would    state   
2   china   global    water    youth    state      fund    green       us   
3    need     also     said      new     plan      risk     make     coal   
4   human    would    plant  sustain    water  investor    power     plan   
5    said    china     year  student  project   sustain     also   nation   
6     one      new     also  develop     also    sector     said     also   
7    also     year      use  project     work     green    elect   global   
8     new  develop     cost     also  sustain       oil      one      new   
9    food    trade      one       al   region     world       us   action   

    Topic_16 Topic_17   Topic_18   Topic_19 Topic_2  Topic_3   

In [9]:
num_topics = [5,7]
max_dfs = [0.5]
max_features = [1000, 3000]
norm_type = [ '_stem']

alphas = [ 0.001, 0.1, 0.5]
etas = [0.001, 0.1, 0.5]

In [11]:
topic_loop(news_df, num_topics, max_dfs, max_features, norm_type, alphas, etas)

  diff = np.log(self.expElogbeta)


Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.001
     Topic_0   Topic_1  Topic_2    Topic_3    Topic_4
0    develop      year     year      trump     nation
1      india  research      per         us     carbon
2      china    impact     need      state       year
3         us   develop     cent        say    develop
4       year    region  develop       year         us
5      state     water    water     nation  agreement
6      water    effect      use       time      state
7  agreement      area     plan       like       fund
8     nation     state   report       make      green
9      trump      rise   nation  agreement     report

Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.1
   Topic_0    Topic_1    Topic_2    Topic_3      Topic_4
0    china       year      trump      trump        state
1      per       like         us         us       nation
2  develop        say    develop      water         

Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  3000  Alpha:  0.1  Eta:  0.1
   Topic_0    Topic_1    Topic_2  Topic_3  Topic_4
0  nigeria      trump       year    trump    water
1       mr         us      state       us  develop
2    china       year  scientist    state     year
3   nation  agreement     impact   nation     food
4  develop      state       rise    china     need
5       us     carbon       warm  develop  project
6     year       warm      flood     year  sustain
7   report     nation      level   leader      use
8    state        say       like      per   nation
9   accord     accord     nation     cent      per

Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  3000  Alpha:  0.1  Eta:  0.5
     Topic_0  Topic_1    Topic_2  Topic_3  Topic_4
0       year    water      trump  develop    state
1       warm     year         us       us   nation
2       time  develop  agreement     year  develop
3       like   report      state   nation       us
4  scie

Norm:  _stem Num topics:  7 , Max df:  0.5  Max features:  1000  Alpha:  0.5  Eta:  0.001
    Topic_0 Topic_1    Topic_2    Topic_3  Topic_4 Topic_5  Topic_6
0      year   trump       time      trump  develop    year   nation
1    impact      us       like      state     year   water  develop
2     state    year  scientist         us       us  carbon     year
3     level  nation       year  agreement    india    need      per
4   weather  report      state    develop   nation     say     cent
5    report   state    percent     nation       al   state    china
6   project    make       even     accord    state    like     risk
7    region    time       food       plan    china    fuel     need
8  research      mr     nation     action      uae    make     fund
9      rise    work       need       year    dhabi     use    state

Norm:  _stem Num topics:  7 , Max df:  0.5  Max features:  1000  Alpha:  0.5  Eta:  0.1
   Topic_0 Topic_1  Topic_2    Topic_3  Topic_4    Topic_5    Topic_6
0  

Norm:  _stem Num topics:  7 , Max df:  0.5  Max features:  3000  Alpha:  0.5  Eta:  0.1
   Topic_0    Topic_1      Topic_2    Topic_3 Topic_4 Topic_5      Topic_6
0    water      trump        trump      china    year    year         year
1  develop         us        state    develop    like  report      develop
2    state  agreement           us      power    time     per       forest
3   nation      state         year     nation     say   state       nation
4   report       year          say    nigeria   state  nation  environment
5     year     accord      develop      green     use    cent      sustain
6   impact     nation       invest      state    make   could        water
7     plan        say           mr  agreement  carbon   storm         plan
8  weather       warm  environment     invest    need    warm         work
9    flood       make       report    sustain    live    like       action

Norm:  _stem Num topics:  7 , Max df:  0.5  Max features:  3000  Alpha:  0.5  Eta:  0.

In [12]:
num_topics = [3]
max_dfs = [0.3, 0.5]
max_features = [1000, 2000, 3000]
norm_type = [ '_stem']

alphas = [ 0.00001, 0.001, 0.1, 0.5]
etas = [0.00001, 0.001, 0.1, 0.5]

In [13]:
topic_loop(news_df, num_topics, max_dfs, max_features, norm_type, alphas, etas)

  diff = np.log(self.expElogbeta)


Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  1000  Alpha:  1e-05  Eta:  1e-05
   Topic_0    Topic_1  Topic_2
0  develop       warm      per
1   nation     nation     cent
2    china     impact      say
3     plan  scientist       mr
4    water      human   nation
5      use     report  develop
6   region   research    power
7  project        say     plan
8    power      level   report
9    build       rise   accord

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  1000  Alpha:  1e-05  Eta:  0.001
     Topic_0  Topic_1 Topic_2
0     nation  develop  report
1       warm   nation   water
2    develop   accord     use
3        say    china     say
4  scientist     plan  impact
5      level  support    risk
6       rise    power    warm
7      human   leader   power
8   research     meet  nation
9        per     work  public

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  1000  Alpha:  1e-05  Eta:  0.1
   Topic_0  Topic_1    Topic_2
0   nation    india   

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  2000  Alpha:  0.001  Eta:  1e-05
     Topic_0  Topic_1  Topic_2
0       warm  develop   nation
1     nation      say  develop
2     report     work    china
3  scientist     plan      per
4       risk      use   accord
5      human   nation       mr
6      water     warm   report
7        say     come     cent
8       rise   forest  nigeria
9      level    build    power

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  2000  Alpha:  0.001  Eta:  0.001
   Topic_0    Topic_1  Topic_2
0   nation     nation       mr
1    china     report   invest
2  develop       warm  develop
3    water        say      oil
4   accord    develop  student
5  nigeria     impact   school
6  support  scientist    water
7   leader      human     work
8     work        use  project
9     meet        per   africa

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  2000  Alpha:  0.001  Eta:  0.1
   Topic_0    Topic_1    Topic_2
0  develo

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  3000  Alpha:  0.001  Eta:  0.5
     Topic_0  Topic_1 Topic_2
0       warm  develop  nation
1     report   nation  accord
2  scientist   invest    unit
3        say  sustain      mr
4     nation    china  commit
5      water  nigeria   china
6      human    water    meet
7   research  support  leader
8     impact   impact   power
9      level     plan    work

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  3000  Alpha:  0.1  Eta:  1e-05
   Topic_0  Topic_1    Topic_2
0   nation  develop       warm
1   accord    water     report
2  develop   nation        say
3    power      per     nation
4    china    china  scientist
5  support     plan       rise
6   invest      use      level
7     work  sustain      human
8   commit  product     impact
9       mr   impact    weather

Norm:  _stem Num topics:  3 , Max df:  0.3  Max features:  3000  Alpha:  0.1  Eta:  0.001
    Topic_0    Topic_1  Topic_2
0   develop       warm   

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  1000  Alpha:  0.1  Eta:  0.1
   Topic_0      Topic_1    Topic_2
0     year        state      trump
1    water         year         us
2   report       nation  agreement
3      per         make      state
4  develop         time       year
5      say           us     nation
6   impact         like      power
7      use      develop         mr
8     like         need     carbon
9  project  environment     accord

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  1000  Alpha:  0.1  Eta:  0.5
     Topic_0    Topic_1 Topic_2
0      trump    develop    year
1         us     nation   state
2      state      state    like
3  agreement    nigeria  report
4       year  agreement    time
5     nation      india  carbon
6      china       need   water
7     accord       year  nation
8       make       plan     say
9        say     action  impact

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  1000  Alpha:  0.5  Eta:  1

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  2000  Alpha:  0.5  Eta:  1e-05
       Topic_0 Topic_1  Topic_2
0      develop   trump  develop
1         year      us     year
2       invest    year   nation
3       nation   state    water
4        state    warm     need
5  environment    like     plan
6        green  nation     work
7        china  report      per
8        india     say    state
9         need    time      say

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  2000  Alpha:  0.5  Eta:  0.001
   Topic_0    Topic_1   Topic_2
0     year      trump     state
1  develop         us     water
2     need       year      year
3   nation      state   develop
4    china  agreement     trump
5    green     nation    nation
6  nigeria       time  research
7  project     report      warm
8     work        say     level
9   invest       like       use

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  2000  Alpha:  0.5  Eta:  0.1
     Topic_0  Topic_1 Topi

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  3000  Alpha:  0.5  Eta:  0.5
   Topic_0    Topic_1    Topic_2
0  develop       year      trump
1     year      water         us
2     need       warm      state
3    state       time  agreement
4   nation     report     nation
5     plan       like     accord
6  sustain        use      china
7     fund  scientist       year
8  project     nation       make
9     work        say        say



## HERE

In [12]:
num_topics = [5, 6, 7]
max_dfs = [0.5]
max_features = [1000, 3000]
norm_type = [ '_stem']

alphas = [0.001, 0.1, 0.5]
etas = [0.001, 0.1, 0.5]

stop_words = lucem_illud.stop_words_basic +  ['climat', 'chang', 'like', 'year','mr', 'say','could']

In [13]:
topic_loop(news_df, stop_words, num_topics, max_dfs, max_features, norm_type, alphas, etas, random_number=1024)

  diff = np.log(self.expElogbeta)


Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.001
     Topic_0    Topic_1   Topic_2  Topic_3  Topic_4
0      trump       time   nigeria  develop  develop
1      state       rise     water    china      per
2         us  scientist      food       us     cent
3  agreement      human    nation     work   nation
4     nation    weather   develop    trump     risk
5     report       make       use     need    china
6     accord      level  research   nation   sector
7     carbon      flood     state     make    water
8     action      state      need     meet    state
9       plan         us    africa    power     plan

Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.1
     Topic_0    Topic_1   Topic_2  Topic_3  Topic_4
0      trump       time   nigeria  develop  develop
1      state       rise     water    china      per
2         us  scientist      food       us     cent
3  agreement      human    nation    

Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  3000  Alpha:  0.1  Eta:  0.1
   Topic_0    Topic_1  Topic_2  Topic_3  Topic_4
0   nation      trump       us     time    water
1  develop         us    india    state  develop
2  nigeria      state  develop   nation  project
3    state  agreement   nation    trump    state
4    china     carbon    power     warm     plan
5     food       warm     work       us      use
6     need     accord     time    human  sustain
7      per        gas    state   report   nation
8      use       risk    china     make     area
9  sustain     invest   action  weather   farmer

Norm:  _stem Num topics:  5 , Max df:  0.5  Max features:  3000  Alpha:  0.1  Eta:  0.5
   Topic_0    Topic_1  Topic_2  Topic_3  Topic_4
0   nation      trump       us     time    water
1  develop         us    india    state  develop
2  nigeria      state  develop   nation  project
3    state  agreement   nation    trump    state
4    china     carbon    power     warm

Norm:  _stem Num topics:  6 , Max df:  0.5  Max features:  1000  Alpha:  0.5  Eta:  0.1
       Topic_0    Topic_1    Topic_2  Topic_3  Topic_4    Topic_5
0        state       rise      water  develop      per      trump
1       report       time       food     work  develop         us
2       carbon    weather        use     need     cent  agreement
3       nation      level   research      uae   nation     nation
4         plan      flood  scientist     make    water      china
5        trump      human     nation   nation     risk      state
6        water  scientist       time    china    state     accord
7  environment       make    nigeria       us     area     leader
8       impact      storm    develop    power  million    develop
9       action      state      state     help   region       meet

Norm:  _stem Num topics:  6 , Max df:  0.5  Max features:  1000  Alpha:  0.5  Eta:  0.5
       Topic_0    Topic_1    Topic_2  Topic_3  Topic_4    Topic_5
0        state       rise      

Norm:  _stem Num topics:  7 , Max df:  0.5  Max features:  1000  Alpha:  0.001  Eta:  0.001
       Topic_0    Topic_1    Topic_2      Topic_3  Topic_4    Topic_5  Topic_6
0        state       rise      water      develop      per      trump  develop
1       report       time       food         work  develop         us  project
2       carbon    weather    nigeria         need     cent  agreement     time
3       nation  scientist        use          uae    water      china   nation
4         plan      level  scientist        china   nation     nation   africa
5  environment      flood   research         risk     risk      state  support
6       impact      human     nation         make     area     accord    first
7        trump       make     school           us    state     leader     fund
8       action      storm      state       nation  million    develop     need
9        water      state       time  environment   region       meet     work

Norm:  _stem Num topics:  7 , Max df: 

Norm:  _stem Num topics:  7 , Max df:  0.5  Max features:  3000  Alpha:  0.001  Eta:  0.001
   Topic_0    Topic_1  Topic_2 Topic_3  Topic_4   Topic_5   Topic_6
0   nation      trump    india   trump    water     water   develop
1      per         us       us    time  project     state   nigeria
2    china      state    power  nation  develop    impact    nation
3     cent  agreement   nation    warm     plan      food     china
4  develop     carbon  develop   state      use    farmer     state
5     need     accord     time   human     area  research   support
6     food       warm    state      us  sustain   weather   sustain
7      use        gas   action  report   nation    forest      work
8    state       risk    china    make     need    report      need
9  nigeria     invest      uae   storm    state      make  nigerian

Norm:  _stem Num topics:  7 , Max df:  0.5  Max features:  3000  Alpha:  0.001  Eta:  0.1
   Topic_0    Topic_1  Topic_2 Topic_3  Topic_4   Topic_5   Topic_6
0

In [14]:
num_topics = [3,4]
max_dfs = [0.5]
max_features = [1000, 3000]
norm_type = [ '_stem']

alphas = [0.1]
etas = [0.1]

stop_words = lucem_illud.stop_words_basic +  ['climat', 'chang', 'like', 'year','mr', 'say','could']

In [15]:
topic_loop(news_df, stop_words, num_topics, max_dfs, max_features, norm_type, alphas, etas, random_number=1024)

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  1000  Alpha:  0.1  Eta:  0.1
     Topic_0    Topic_1  Topic_2
0      trump       time  develop
1      state       make    china
2         us  scientist   nation
3     nation       rise     food
4  agreement      human  nigeria
5     report         us     need
6     accord      level    water
7       plan    weather   region
8    develop      state     work
9       need     nation      use

Norm:  _stem Num topics:  3 , Max df:  0.5  Max features:  3000  Alpha:  0.1  Eta:  0.1
   Topic_0    Topic_1   Topic_2
0   nation      trump        us
1  develop         us   develop
2    state      state    nation
3    water       warm      time
4     need  agreement      work
5  nigeria     carbon     state
6      use       make     india
7     food  scientist    report
8     make       time  research
9  sustain     report    action

Norm:  _stem Num topics:  4 , Max df:  0.5  Max features:  1000  Alpha:  0.1  Eta:  0.1
     Topic_0    Top