# Load LDA Model

In [1]:
from gensim.models import LdaModel

lda_model = LdaModel.load('lda_model/lda_model')

# Read Input Text

In [2]:
from gensim.corpora.dictionary import Dictionary
from lda_helpers import read_lda_input  # Package with helpers

title_texts = read_lda_input('lda_input/lda_input.jl', title=True)  # Read anime show titles with text, for later
texts = [title_text[1] for title_text in title_texts]
id2word = Dictionary(texts)
corpus = [id2word.doc2bow(text) for text in texts]

ModuleNotFoundError: No module named 'lda_helpers'

# Visualize Topics

In [None]:
import pyLDAvis
import pyLDAvis.gensim

pyLDAvis.enable_notebook()
LDAvis_display = pyLDAvis.gensim.prepare(lda_model, corpus, id2word, sort_topics=False)
LDAvis_display

# Assign Genre Names

In [None]:
# Genres must be explicitly renamed here
# They are not in the originally intended order, as in 'lda_seed.py'
genre_names = [
    'Adventure',
    'Sports',
    'Sci-Fi',
    'Mystery',
    'Slice of Life',
    'School'
]

# Output LDA Genre Results

In [None]:
import json
from os import mkdir

mkdir('lda_output')

## Genre Names

In [None]:
with open('lda_output/genre_names.jl', 'w') as f:
    for i, genre_name in enumerate(genre_names):
        # Write output JSON as newline
        record = {
            'LDA Genre ID': i,
            'LDA Genre Name': genre_name
        }
        line = json.dumps(record)
        f.write('{}\n'.format(line))

## Word Distribution of each Genre (Top 50 Words by Weight)

In [None]:
with open('lda_output/genre_word_weights.jl', 'w') as f:
    for i in range(len(genre_names)):
        genre = lda_model.show_topic(i, topn=50)
        for word, word_weight in genre:
            # Write output JSON as newline
            record = {
                'LDA Genre ID': i,
                'Word': word,
                'Word Weight': float(word_weight)
            }
            line = json.dumps(record)
            f.write('{}\n'.format(line))

## Genre Breakdown of each Anime

In [None]:
with open('lda_output/anime_genre_weights.jl', 'w') as f:
    for i, bow in enumerate(corpus):
        title = title_texts[i][0]
        anime_genres = lda_model.get_document_topics(bow, minimum_probability=0)
        for genre_id, genre_weight in anime_genres:
            # Write output JSON as newline
            record = {
                'Anime Title': title,
                'LDA Genre ID': genre_id,
                'LDA Genre Weight': float(genre_weight)
            }
            line = json.dumps(record)
            f.write('{}\n'.format(line))