In [None]:
import numpy as np

from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(
    max_df=0.95, min_df=2, max_features=1000, stop_words="english"
)
tfidf = tfidf_vectorizer.fit_transform(np.array(df["preprocess"]))
tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()

In [None]:
lda = LatentDirichletAllocation(n_components=15)
lda.fit(tfidf)

In [None]:
import matplotlib.pyplot as plt

n_top_words = 20

fig, axes = plt.subplots(3, 5, figsize=(18, 15), sharex=True)
axes = axes.flatten()

for topic_idx, topic in enumerate(lda.components_):
  top_features_ind = topic.argsort()[: -n_top_words - 1 : -1]
  top_features = [tfidf_feature_names[i] for i in top_features_ind]
  
  weights = topic[top_features_ind]

  ax = axes[topic_idx]
  ax.barh(top_features, weights, height=0.5)
  ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 15})
  ax.invert_yaxis()
  ax.tick_params(axis="both", which="major", labelsize=15)
  for i in "top right left".split():
      ax.spines[i].set_visible(False)

plt.show()
fig.tight_layout()