In [None]:
from gensim.models.coherencemodel import CoherenceModel
from gensim.models import LdaModel
import import_ipynb
import pyLDAvis
import pyLDAvis.gensim  # don't skip this

from sklearn.decomposition import NMF
from sklearn.decomposition import TruncatedSVD

In [10]:
def lda_modelling(corpus, train_lemmatized, dictionary, noof_topics):
    """
    lda_topics performs LDA topic modeling on the input data

    :param processed_data: list of preprocessed segments
    :param max_iter: max. number of iterations
    :param n_words: number of topic representatives

    :return:
        - topics - list of topics (and their representatives
        - doc_topics - list of predicted topics, one for each segment
    """

    lda_model = LdaModel(corpus, id2word=dictionary, num_topics=noof_topics, random_state=42, update_every=1,
                             chunksize=100, passes=5, alpha='auto', eta='auto', per_word_topics=True)
    
#     print(lda_model.print_topics())
    doc_lda = lda_model[corpus]
    
    # Compute Perplexity
    print('\nPerplexity: ', lda_model.log_perplexity(corpus))  # a measure of how good the model is. lower the better.

    # Compute Coherence Score
    coherence_model_lda = CoherenceModel(model=lda_model, texts=train_lemmatized, dictionary=dictionary, coherence='c_v')
    coherence_lda = coherence_model_lda.get_coherence()
    print('\nCoherence Score: ', coherence_lda)
    
    pyLDAvis.enable_notebook()
    vis = pyLDAvis.gensim.prepare(lda_model, corpus, dictionary)

    return vis, lda_model


In [None]:
def select_lda_topics(corpus, train_lemmatized, dictionary, topics_range):
    """
    lda_topics performs LDA topic modeling on the input data

    :param processed_data: list of preprocessed segments
    :param max_iter: max. number of iterations
    :param n_words: number of topic representatives

    :return:
        - topics - list of topics (and their representatives
        - doc_topics - list of predicted topics, one for each segment
    """
    coherence_values = []
    model_list = []

    for k in topics_range:
        
        # calculate LDA model
        ldamodel = LdaModel(corpus, id2word=dictionary, num_topics=k, random_state=42, 
                             alpha='auto', eta='auto', per_word_topics=True)
        
        model_list.append(ldamodel)
        
        coherencemodel = CoherenceModel(model=ldamodel, texts=train_lemmatized, dictionary=dictionary, coherence='c_v')
        coherence_values.append(coherencemodel.get_coherence())


    return model_list, coherence_values


In [None]:
def lsa_modelling(vect_text, nooftopics):
    lsa_model = TruncatedSVD(n_components=nooftopics, n_iter=10, random_state=42)
    lsa_tops=lsa_model.fit_transform(vect_text)
    return lsa_model, lsa_tops   

In [None]:
def nmf_modelling(vect_text, nooftopics):
    nmf_model = NMF(n_components=nooftopics,random_state=42)
    nmf_model.fit(vect_text)
    return nmf_model    