In [1]:
# Optimal parameters

k = 4
r = 1
alpha = None
eta = None

In [2]:
# Read input text

from gensim.corpora.dictionary import Dictionary
from lda_helpers import read_lda_input  # Package with helpers

title_texts = read_lda_input('lda_input.jl', title=True)
texts = [title_text[1] for title_text in title_texts]
id2word = Dictionary(texts)
corpus = [id2word.doc2bow(text) for text in texts]

In [3]:
# Run model, and save

from lda_helpers import get_lda_model

lda_model = get_lda_model(corpus, id2word, k, r, alpha, eta)

In [4]:
lda_model.save('lda_output/lda_model')

In [5]:
# Write Genre Names to file

import json

k_names = ['Potato', 'Lettuce', 'Cabbage', 'Broccoli']

with open('lda_output/genre_names.jl', 'w') as f:
    for i in range(k):
        # Write output JSON as newline
        record = {
            'LDA Genre ID': i,
            'LDA Genre Name': k_names[i]
        }
        line = json.dumps(record)
        f.write('{}\n'.format(line))

In [6]:
# Write each Genre's word-distribution to file

with open('lda_output/genre_word_weights.jl', 'w') as f:
    for i in range(k):
        genre = lda_model.show_topic(i, topn=30)
        for word, word_weight in genre:
            # Write output JSON as newline
            record = {
                'LDA Genre ID': i,
                'Word': word,
                'Weight': float(word_weight)
            }
            line = json.dumps(record)
            f.write('{}\n'.format(line))

In [7]:
# Write each Anime's genre-breakdown to file

with open('lda_output/anime_genre_weights.jl', 'w') as f:
    for i, bow in enumerate(corpus):
        title = title_texts[i][0]
        anime_genres = lda_model.get_document_topics(bow, minimum_probability=0)
        for genre_id, genre_weight in anime_genres:
            # Write output JSON as newline
            record = {
                'Title': title,
                'LDA Genre ID': genre_id,
                'LDA Genre Weight': float(genre_weight)
            }
            line = json.dumps(record)
            f.write('{}\n'.format(line))