In [1]:
import gensim
from gensim import corpora
from gensim.models import LsiModel
from pprint import pprint

# Sample documents
documents = [
    "Human machine interface for lab abc computer applications",
    "A survey of user opinion of computer system response time",
    "The EPS user interface management system",
    "System and human system engineering testing of EPS",
    "Relation of user perceived response time to error measurement",
    "The generation of random binary unordered trees",
    "The intersection graph of paths in trees",
    "Graph minors IV Widths of trees and well quasi ordering",
    "Graph minors A survey",
]

# Tokenize the documents
texts = [[word for word in document.lower().split()] for document in documents]

# Create a dictionary representation of the documents
dictionary = corpora.Dictionary(texts)

# Create a corpus: List of bags of words
corpus = [dictionary.doc2bow(text) for text in texts]

# Number of topics
num_topics = 2

# Create the LSI model
lsi_model = LsiModel(corpus, num_topics=num_topics, id2word=dictionary)

# Print the topics
topics = lsi_model.print_topics(num_topics=num_topics)
pprint(topics)

# Transform the corpus into the LSI topic space
corpus_lsi = lsi_model[corpus]

# Print the topic distribution for each document and identify the dominant topic
for doc_index, doc in enumerate(corpus_lsi):
    print(f"Document {doc_index}:")
    for topic_num, weight in doc:
        print(f"  Topic {topic_num}: {weight:.3f}")
    # Find the dominant topic for this document
    dominant_topic = max(doc, key=lambda x: abs(x[1]))
    print(f"  Dominant topic: {dominant_topic[0]} with weight {dominant_topic[1]:.3f}")


[(0,
  '0.615*"of" + 0.353*"system" + 0.255*"user" + 0.209*"time" + '
  '0.209*"response" + 0.188*"trees" + 0.161*"the" + 0.161*"and" + '
  '0.160*"graph" + 0.158*"a"'),
 (1,
  '0.404*"trees" + 0.362*"graph" + -0.282*"system" + 0.241*"minors" + '
  '-0.226*"user" + 0.195*"ordering" + 0.195*"widths" + 0.195*"quasi" + '
  '0.195*"well" + 0.195*"iv"')]
Document 0:
  Topic 0: 0.421
  Topic 1: -0.748
  Dominant topic: 1 with weight -0.748
Document 1:
  Topic 0: 2.854
  Topic 1: -1.011
  Dominant topic: 0 with weight 2.854
Document 2:
  Topic 0: 1.017
  Topic 1: -0.670
  Dominant topic: 0 with weight 1.017
Document 3:
  Topic 0: 1.898
  Topic 1: -0.742
  Dominant topic: 0 with weight 1.898
Document 4:
  Topic 0: 1.673
  Topic 1: -0.859
  Dominant topic: 0 with weight 1.673
Document 5:
  Topic 0: 1.182
  Topic 1: 0.987
  Dominant topic: 0 with weight 1.182
Document 6:
  Topic 0: 1.304
  Topic 1: 1.362
  Dominant topic: 1 with weight 1.362
Document 7:
  Topic 0: 1.589
  Topic 1: 2.197
  Domina