## Ali's code

It seems that the way this link computes the weights of each topic in each document is not the optimal way (as it will not give you a weight if it is very small, but in big models, these small weights can also become important). Therefore instead, you can follow the following simple approach to get the weights:
To get the weights of a topic in the ith document, you call the method get_document_topics of the topic model object and pass the ith element of M1 (or tfidf_M1 in case you use tfidf) to it in the following way (minimum_probability=0 should be set so that none of near to zero entries are not removed) : 
lda_model.get_document_topics(M1[i],minimum_probability=0)
So let us see it in a complete example we had before: 

which gives the output (topics and weights of words in topics):

In [1]:
corpus = ["cryptography can be used for preventing data leakage in computer security",
"supervised learning and unsupervised learning are the two main groups of methods in machine learning",
"while in supervised learning we have access to the target variable in unsupervised learning we do not have such a variable",
"there are some methods in security for reducing the risk of information leakage like authentication and cryptography",
"topic modeling in an unsupervised machine learning model and therefore we do not have target variables"
]
stop_words = ["can","be","for","two","the","for","we","in","not","do","are","to","an","there","some","have","a","and","of","like","while","therefore","such"]

def clean_doc(doc):
    return " ".join([word for word in doc.lower().split() if word not in stop_words])

corpus_clean = [clean_doc(doc) for doc in corpus]
corpus_clean = [doc.split() for doc in corpus_clean]
from gensim import corpora
dictionary = corpora.Dictionary(corpus_clean)
dictionary.doc2bow(corpus_clean[2])
M1 = [dictionary.doc2bow(doc) for doc in corpus_clean]
import gensim
Lda = gensim.models.ldamodel.LdaModel
lda_model = Lda(M1, num_topics=2, id2word = dictionary, passes=5,random_state =0)
topics = lda_model.print_topics(num_topics=5, num_words=10)
for topic in topics:
    print(topic)

(0, '0.084*"learning" + 0.071*"cryptography" + 0.071*"security" + 0.071*"leakage" + 0.070*"methods" + 0.043*"computer" + 0.043*"data" + 0.043*"used" + 0.042*"preventing" + 0.042*"authentication"')
(1, '0.136*"learning" + 0.093*"unsupervised" + 0.082*"target" + 0.080*"variable" + 0.058*"machine" + 0.057*"supervised" + 0.050*"topic" + 0.050*"variables" + 0.050*"modeling" + 0.050*"model"')


Now, the following call gives us the weight of the two topics in the first document:

In [2]:
lda_model.get_document_topics(M1[0],minimum_probability=0)

[(0, 0.9348568), (1, 0.06514323)]

And here is a complete working code with tfidf :
Note that as we expect, the weight should be different here as the model has been changed. 


In [6]:
corpus = ["cryptography can be used for preventing data leakage in computer security",
"supervised learning and unsupervised learning are the two main groups of methods in machine learning",
"while in supervised learning we have access to the target variable in unsupervised learning we do not have such a variable",
"there are some methods in security for reducing the risk of information leakage like authentication and cryptography",
"topic modeling in an unsupervised machine learning model and therefore we do not have target variables"
]
stop_words = ["can","be","for","two","the","for","we","in","not","do","are","to","an","there","some","have","a","and","of","like","while","therefore","such"]

def clean_doc(doc):
    return " ".join([word for word in doc.lower().split() if word not in stop_words])

corpus_clean = [clean_doc(doc) for doc in corpus]
corpus_clean = [doc.split() for doc in corpus_clean]
from gensim import corpora
dictionary = corpora.Dictionary(corpus_clean)
dictionary.doc2bow(corpus_clean[2])
M1 = [dictionary.doc2bow(doc) for doc in corpus_clean]
import gensim
tfidf_model = gensim.models.TfidfModel(M1)
tfidf_M1 = tfidf_model[M1]
Lda = gensim.models.ldamodel.LdaModel
lda_model = Lda(tfidf_M1, num_topics=2, id2word = dictionary, passes=5,random_state =0)
topics = lda_model.print_topics(num_topics=5, num_words=10)
for topic in topics:
    print(topic)
lda_model.get_document_topics(tfidf_M1[0],minimum_probability=0)

(0, '0.058*"variable" + 0.055*"learning" + 0.046*"methods" + 0.045*"main" + 0.045*"cryptography" + 0.045*"groups" + 0.045*"security" + 0.045*"leakage" + 0.045*"supervised" + 0.043*"computer"')
(1, '0.060*"topic" + 0.059*"variables" + 0.059*"modeling" + 0.059*"model" + 0.050*"target" + 0.049*"machine" + 0.043*"learning" + 0.042*"unsupervised" + 0.035*"variable" + 0.035*"supervised"')


[(0, 0.83430034), (1, 0.16569969)]

# Our code

In [60]:
def readFile(filename):
    fileData = ""
    file = open("abstracts/" + filename, 'r')
    fileData = file.read().splitlines()
    file.close
    return fileData

  and should_run_async(code)


In [61]:
import nltk
import gensim
from gensim import corpora
#nltk.download('stopwords')      # If this code block complains, comment out this line
from nltk.corpus import stopwords



  and should_run_async(code)


In [62]:
def clean_doc(doc):
    return " ".join([word for word in doc.lower().split() if word not in stop_words])

  and should_run_async(code)


In [63]:
corpus = []
abstracts
for i in range (2012,2020):
    conference = readFile('usenix' + str(i) + 'Abstracts.txt')
    for abstract in conference:
        corpus.append(abstract)

  and should_run_async(code)


In [64]:
stop_words = stopwords.words('english')

corpus_clean = [clean_doc(doc) for doc in corpus]
corpus_clean = [doc.split() for doc in corpus_clean]


dictionary = corpora.Dictionary(corpus_clean)
dictionary.doc2bow(corpus_clean[2])
M1 = [dictionary.doc2bow(doc) for doc in corpus_clean]

tfidf_model = gensim.models.TfidfModel(M1)
tfidf_M1 = tfidf_model[M1]



  and should_run_async(code)


In [65]:
Lda = gensim.models.ldamodel.LdaModel
lda_model = Lda(tfidf_M1, num_topics=4, id2word = dictionary, passes=5, random_state =0)

  and should_run_async(code)


### All topics

In [66]:
topics = lda_model.print_topics(num_topics=5, num_words=10)

for topic in topics:
    print(topic)


(0, '0.001*"attacks" + 0.000*"attack" + 0.000*"security" + 0.000*"data" + 0.000*"cache" + 0.000*"memory" + 0.000*"privacy" + 0.000*"system" + 0.000*"user" + 0.000*"network"')
(1, '0.001*"security" + 0.001*"attacks" + 0.001*"memory" + 0.000*"apps" + 0.000*"web" + 0.000*"network" + 0.000*"attack" + 0.000*"data" + 0.000*"analysis" + 0.000*"code"')
(2, '0.001*"malware" + 0.001*"security" + 0.001*"privacy" + 0.000*"password" + 0.000*"network" + 0.000*"data" + 0.000*"use" + 0.000*"devices" + 0.000*"students" + 0.000*"users"')
(3, '0.001*"data" + 0.001*"attacks" + 0.001*"security" + 0.000*"users" + 0.000*"web" + 0.000*"code" + 0.000*"user" + 0.000*"vulnerabilities" + 0.000*"system" + 0.000*"attack"')


  and should_run_async(code)


### Visualisation of topics

In [67]:
import pyLDAvis.gensim
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim.prepare(lda_model, tfidf_M1, dictionary)
vis

  and should_run_async(code)


### Topics per document

In [68]:
lda_model.get_document_topics(tfidf_M1[0],minimum_probability=0)

  and should_run_async(code)


[(0, 0.9191931), (1, 0.02662838), (2, 0.026896829), (3, 0.027281724)]