In [58]:
from gensim.models import HdpModel, LdaModel
from gensim.corpora import Dictionary
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [59]:
paragraph = """ We discussed quantum deformations of D=4 Lorentz and Poincare algebras. In
the case of Poincare algebra it is shown that almost all classical r-matrices
of S. Zakrzewski classification correspond to twisted deformations of Abelian
and Jordanian types. A part of twists corresponding to the r-matrices of
Zakrzewski classification are given in explicit form.
"""

In [60]:
tokenized_paragraph = paragraph.lower().split()

In [61]:
dictionary = Dictionary([tokenized_paragraph])

In [62]:
bow_corpus = [dictionary.doc2bow(tokenized_paragraph)]

In [63]:
hdp_model = HdpModel(corpus=bow_corpus, id2word=dictionary)
hdp_topics = hdp_model.show_topics()

In [64]:
lda_model = LdaModel(corpus=bow_corpus, id2word=dictionary, num_topics=5)
lda_topics = lda_model.show_topics()

In [65]:
def remove_stop_words(text):
    # Download the stop words if not already downloaded
    # nltk.download('stopwords')
    
    # Get the list of stop words
    stop_words = set(stopwords.words('english'))
    
    # Tokenize the input text
    tokens = word_tokenize(text)
    
    # Remove stop words from the tokenized text
    filtered_tokens = [token for token in tokens if token.lower() not in stop_words]
    
    # Reconstruct the text without stop words
    filtered_text = ' '.join(filtered_tokens)
    
    return filtered_text

In [66]:
def remove_numbers_dots_plus(string):
    pattern = r"[0-9.+*,]"
    cleaned_string = re.sub(pattern, "", string)
    cleaned_string= remove_stop_words(cleaned_string)
    return cleaned_string

In [67]:
print("HDP Topics:")
for val, topic in enumerate(hdp_topics):
    topic_words = topic[1]
    sentence= remove_numbers_dots_plus(topic_words)
    print(sentence)
    if val==4:
        break

HDP Topics:
classification corresponding almost r-matrices lorentz poincare correspond d= twisted part form quantum explicit
types deformations algebras jordanian given case twists explicit abelian zakrzewski part algebra shown
discussed part corresponding r-matrices poincare lorentz classical given deformations types abelian zakrzewski
almost quantum given algebras correspond zakrzewski deformations r-matrices part case twisted
poincare zakrzewski jordanian classification types given corresponding deformations twisted r-matrices case shown d=


In [68]:
print("LDA Topics:")
for topic in lda_topics:
    topic_words = topic[1]
    sentence= remove_numbers_dots_plus(topic_words)
    print(sentence)

LDA Topics:
`` '' `` classification '' `` '' `` '' `` zakrzewski '' `` poincare '' `` '' `` r-matrices '' `` '' `` deformations ''
`` '' `` zakrzewski '' `` '' `` deformations '' `` r-matrices '' `` '' `` poincare '' `` '' `` classification '' `` ''
`` '' `` '' `` deformations '' `` '' `` poincare '' `` r-matrices '' `` '' `` '' `` classification '' `` zakrzewski ''
`` '' `` deformations '' `` zakrzewski '' `` r-matrices '' `` classification '' `` '' `` '' `` '' `` poincare '' `` ''
`` '' `` '' `` poincare '' `` '' `` r-matrices '' `` '' `` deformations '' `` '' `` classification '' `` zakrzewski ''
