In [1]:
from nltk.tokenize import RegexpTokenizer
from stop_words import get_stop_words
from nltk.stem.porter import PorterStemmer
from gensim import corpora, models
import gensim
import pandas as pd

In [2]:
tokenizer = RegexpTokenizer(r'\w+')

In [3]:
# create English stop words list
en_stop = get_stop_words('en')

In [4]:
# Create p_stemmer of class PorterStemmer
p_stemmer = PorterStemmer()

In [5]:
doc_set = pd.read_csv('Reviews_1.csv', engine='python').Review_Detail

In [6]:
doc_set.sample(10)

7      fingerprint unlock is also good, if you have f...
97     i charged the phone completely out of the box ...
255                             its not worth that price
402    This phone has one of the best solid built qua...
442    timely delivery...good products...affordable p...
405    Gifted to my mom. To my surprise, it was faste...
44     no issues with call clarity and now the best part
25     fingerprint sensor, face unlock are exciting f...
411    Good & fast service but not satisfied with pay...
144                               brightness is very low
Name: Review_Detail, dtype: object

In [7]:
# list for tokenized documents in loop
texts = []

# loop through document list
for i in doc_set:
    
    # clean and tokenize document string
    raw = i.lower()
    tokens = tokenizer.tokenize(raw)

    # remove stop words from tokens
    stopped_tokens = [i for i in tokens if not i in en_stop]
    
    # stem tokens
    stemmed_tokens = [p_stemmer.stem(i) for i in stopped_tokens]
    
    # add tokens to list
    texts.append(stemmed_tokens)

In [8]:
# turn our tokenized documents into a id <-> term dictionary
dictionary = corpora.Dictionary(texts)
    
# convert tokenized documents into a document-term matrix
corpus = [dictionary.doc2bow(text) for text in texts]

# generate LDA model
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics=3, id2word = dictionary, passes=20)

In [9]:
ldamodel.print_topics(num_topics=3, num_words=10)

[(0,
  '0.035*"good" + 0.034*"phone" + 0.017*"product" + 0.012*"qualiti" + 0.011*"display" + 0.011*"camera" + 0.009*"use" + 0.009*"s" + 0.009*"5" + 0.008*"one"'),
 (1,
  '0.033*"phone" + 0.013*"qualiti" + 0.012*"t" + 0.012*"one" + 0.012*"amazon" + 0.010*"price" + 0.008*"oneplu" + 0.008*"use" + 0.008*"replac" + 0.007*"work"'),
 (2,
  '0.020*"phone" + 0.014*"camera" + 0.014*"batteri" + 0.010*"10" + 0.009*"time" + 0.009*"screen" + 0.009*"amazon" + 0.008*"work" + 0.008*"s" + 0.008*"charg"')]