# ADMAGD - Results

## Importing libraries

In [1]:
import numpy as np
import pandas as pd

## Retrieve Model

In [2]:
model_file_name = "admagd_model"

In [3]:
model_path = f"trained_ model/{model_file_name}"

### Pickle

In [4]:
import pickle

Load the model from a file

In [5]:
with open(f"{model_path}.pkl", 'rb') as f:
    loaded_model = pickle.load(f)

### joblib

In [6]:
# from joblib import load

Load the model from a file

In [7]:
# loaded_model_joblib = load(f"{model_path}.joblib")

## Results

### Extract word for each topic

In [8]:
# After you've run Gibbs sampling
word_topic_matrix = loaded_model.word_topic_matrix
word_topic_sum = word_topic_matrix.sum(axis=1)[:, np.newaxis]
word_topic_dist = word_topic_matrix / word_topic_sum

In [9]:
# Visualize the top N words for each topic
N_TOP_WORDS = 20
for i in range(loaded_model.num_topics):
    top_words_idx = word_topic_dist[i].argsort()[-N_TOP_WORDS:][::-1]
    top_words = [loaded_model.id2word[idx] for idx in top_words_idx]
    print(f"Topic {i + 1}: {', '.join(top_words)} \n")

Topic 1: like, just, use, know, apr, distribution, university, say, dod, good, make, work, thing, need, time, new, usa, want, look, year 

Topic 2: university, israel, say, know, just, apr, like, state, israeli, right, year, use, jew, arab, make, time, want, world, way, jewish 

Topic 3: use, university, know, like, just, need, work, thanks, problem, want, computer, good, distribution, time, help, run, try, replyto, drive, apr 

Topic 4: university, use, like, know, just, work, time, need, new, distribution, thanks, say, good, look, want, make, try, usa, problem, question 

Topic 5: say, just, government, use, like, state, make, know, gun, right, time, university, way, distribution, apr, good, day, thing, law, want 

Topic 6: university, use, know, distribution, thanks, email, like, computer, look, just, apr, new, work, usa, want, science, problem, time, help, need 

Topic 7: university, know, just, like, use, good, say, time, apr, distribution, want, new, look, car, make, year, comput

### Visualize the author-topic distribution

In [10]:
# Normalize the author_topic_matrix to get author-topic distribution

# Compute the sum of rows in author_topic_matrix
author_topic_sum = loaded_model.author_topic_matrix.sum(axis=1)[:, np.newaxis]

# Replace zero sums with a small epsilon value
epsilon = 1e-10
author_topic_sum[author_topic_sum == 0] = epsilon

# Perform element-wise division
author_topic_dist = loaded_model.author_topic_matrix / author_topic_sum

# Visualize the top N topics for each author
N_TOP_TOPICS = 2
top_topics_list = []
for i, author in enumerate(loaded_model.authors):
    top_topics_idx = author_topic_dist[i].argsort()[-N_TOP_TOPICS:][::-1]
    top_topics_list.append(top_topics_idx)
    # print(f"Author {i+1} => {author} : Topic IDs {top_topics_idx} \n")

In [11]:
top_topics_of_authors_df = pd.DataFrame({'authors': loaded_model.authors, 'topics': top_topics_list})
top_topics_of_authors_df

Unnamed: 0,authors,topics
0,Mamatha Devineni Ratnam,"[10, 19]"
1,mblawson@midway.ecn.uoknor.edu (Matthew B Lawson),"[5, 19]"
2,hilmi-er@dsv.su.se (Hilmi Eren),"[1, 14]"
3,guyd@austin.ibm.com (Guy Dawson),"[2, 15]"
4,Alexander Samuel McDiarmid,"[2, 5]"
...,...,...
8529,pcarmack@gimp.kpc.com (Phil Carmack),"[5, 19]"
8530,gt5735a@prism.gatech.EDU (Mark Devaney),"[0, 18]"
8531,pkeenan@s.psych.uiuc.edu (Patricia Keenan),"[10, 19]"
8532,CCMB,"[0, 18]"
