# Phrase Modelling Part II - Building the Models

In [24]:
import os
from gensim import corpora, models, utils

import time

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)

## Load data

Load dictionaries

In [25]:
path = os.getcwd() + '/tmp/'

sanctity_dict = corpora.Dictionary.load(path + 'sanctity_dict.dict')
degradation_dict = corpora.Dictionary.load(path + 'degradation_dict.dict')
fairness_dict = corpora.Dictionary.load(path + 'fairness_dict.dict')
cheating_dict = corpora.Dictionary.load(path + 'cheating_dict.dict')

Load corpora

In [26]:
path = os.getcwd() + '/tmp/'

sanctity_corpus = corpora.MmCorpus(path + 'sanctity_corpus.mm')
degradation_corpus = corpora.MmCorpus(path + 'degradation_corpus.mm')
fairness_corpus = corpora.MmCorpus(path + 'fairness_corpus.mm')
cheating_corpus = corpora.MmCorpus(path + 'cheating_corpus.mm')

## Build models


Note: Each model can take ~15min to load. Running all 4 models will take about an hour. 

SANCTITY

In [27]:
start = time.time()

# Build LDA model
sanctity_model = models.ldamodel.LdaModel(corpus=sanctity_corpus,
                                     id2word=sanctity_dict,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')

Elapsed: 690.0934567451477 seconds


DEGRADATION

In [28]:
start = time.time()

# Build LDA model
degradation_model = models.ldamodel.LdaModel(corpus=degradation_corpus,
                                     id2word=degradation_dict,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')


Elapsed: 1018.5998075008392 seconds


FAIRNESS

In [29]:
start = time.time()

# Build LDA model
fairness_model = models.ldamodel.LdaModel(corpus=fairness_corpus,
                                     id2word=fairness_dict,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')


Elapsed: 771.3949632644653 seconds


CHEATING

In [30]:
start = time.time()

# Build LDA model
cheating_model = models.ldamodel.LdaModel(corpus=cheating_corpus,
                                     id2word=cheating_dict,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')


Elapsed: 912.7801024913788 seconds


In [32]:
path = os.getcwd() + '/tmp/'

sanctity_model.save(path + 'sanctity_lda_model')
degradation_model.save(path + 'degradation_lda_model') 
fairness_model.save(path + 'fairness_lda_model') 
cheating_model.save(path + 'cheating_lda_model') 

## Load data 2

Load dictionaries

In [33]:
path = os.getcwd() + '/tmp_filter/'

sanctity_dict2 = corpora.Dictionary.load(path + 'sanctity_dict_filter.dict')
degradation_dict2 = corpora.Dictionary.load(path + 'degradation_dict_filter.dict')
fairness_dict2 = corpora.Dictionary.load(path + 'fairness_dict_filter.dict')
cheating_dict2 = corpora.Dictionary.load(path + 'cheating_dict_filter.dict')

Load corpora

In [34]:
path = os.getcwd() + '/tmp_filter/'

sanctity_corpus2 = corpora.MmCorpus(path + 'sanctity_corpus_filter.mm')
degradation_corpus2 = corpora.MmCorpus(path + 'degradation_corpus_filter.mm')
fairness_corpus2 = corpora.MmCorpus(path + 'fairness_corpus_filter.mm')
cheating_corpus2 = corpora.MmCorpus(path + 'cheating_corpus_filter.mm')

## Build models 2 (with filtered set)


SANCTITY

In [35]:
start = time.time()

# Build LDA model
sanctity_model2 = models.ldamodel.LdaModel(corpus=sanctity_corpus2,
                                     id2word=sanctity_dict2,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')

Elapsed: 186.24144625663757 seconds


DEGRADATION

In [36]:
start = time.time()

# Build LDA model
degradation_model2 = models.ldamodel.LdaModel(corpus=degradation_corpus2,
                                     id2word=degradation_dict2,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')


Elapsed: 267.1609296798706 seconds


FAIRNESS

In [37]:
start = time.time()

# Build LDA model
fairness_model2 = models.ldamodel.LdaModel(corpus=fairness_corpus2,
                                     id2word=fairness_dict2,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')


Elapsed: 215.07920598983765 seconds


CHEATING

In [38]:
start = time.time()

# Build LDA model
cheating_model2 = models.ldamodel.LdaModel(corpus=cheating_corpus2,
                                     id2word=cheating_dict2,
                                     num_topics=4, 
                                     random_state=100,
                                     chunksize=100,
                                     passes=10,
                                     alpha='auto',
                                     per_word_topics=True,
                                     eval_every = None)

end = time.time()
print("Elapsed:", end - start, 'seconds')


Elapsed: 249.31983375549316 seconds


In [39]:
path = os.getcwd() + '/tmp_filter/'

sanctity_model2.save(path + 'sanctity_lda_model_filter')
degradation_model2.save(path + 'degradation_lda_model_filter') 
fairness_model2.save(path + 'fairness_lda_model_filter') 
cheating_model2.save(path + 'cheating_lda_model_filter') 

In [6]:


import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)

import pyLDAvis
import pyLDAvis.gensim_models

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes

In [12]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(degradation_model, degradation_corpus, degradation_dict)

  and should_run_async(code)


In [13]:
vis

  and should_run_async(code)


In [23]:
vis2 = pyLDAvis.gensim_models.prepare(sanctity_model2, sanctity_corpus2, sanctity_dict2)

In [24]:
vis2

In [20]:
vis3 = pyLDAvis.gensim_models.prepare(degradation_model2, degradation_corpus2, degradation_dict2)

  and should_run_async(code)


In [21]:
vis3

  and should_run_async(code)


In [None]:
vis3 = pyLDAvis.gensim_models.prepare(sanctity_model2, sanctity_corpus2, sanctity_dict2)