# Top2Vec Analysis

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from top2vec import Top2Vec

## Loading the trained model

In [27]:
#model_name = "tokens_learn_universal-sentence-encoder"
model_name = "lowercase_learn_doc2vec"
model_path = f"E:/top2vec_{model_name}.model"
model = Top2Vec.load(model_path)

### Get Topic Sizes

In [28]:
print(f"Number of topics: {model.get_num_topics()}")

# get topic sizes
topic_sizes, topic_nums = model.get_topic_sizes()
print(f"Topic sizes: {topic_sizes}")
print(f"Topic nums: {topic_nums}")


Number of topics: 293
Topic sizes: [19477  8936  8318  7694  6551  6108  6056  5364  4957  4093  3576  3475
  3429  3356  3224  3198  3103  3054  2923  2851  2549  2363  2145  2078
  2006  1950  1938  1902  1747  1721  1681  1678  1676  1507  1451  1373
  1360  1329  1296  1281  1193  1190  1180  1069  1063  1053  1017  1009
  1005  1001   996   985   936   918   911   895   891   880   866   819
   807   802   801   799   799   797   760   701   699   699   696   683
   655   650   648   636   634   625   595   593   591   581   575   569
   567   564   556   552   545   545   541   536   534   528   518   505
   503   502   493   485   469   462   458   431   431   425   420   417
   417   410   397   394   381   366   362   358   356   356   356   339
   331   322   319   318   317   314   309   306   302   301   296   294
   289   287   287   281   277   267   260   259   248   247   247   244
   243   242   237   236   233   230   230   227   227   225   225   224
   221   220   2

### Get Topics

In [29]:
topic_words, word_scores, topic_nums = model.get_topics(30)
for i,topic in enumerate(topic_words):
    print(f"Topic {i}: {topic[:10]}")

Topic 0: ['yoga' 'yogas' 'yogawithamit' 'yogatx' 'yogavcommunity' 'exercises'
 'workouts' 'workout' 'stretching' 'excercises']
Topic 1: ['workout' 'workouts' 'exercise' 'ejercicios' 'excercises' 'exercises'
 'excercise' 'exercising' 'pilates' 'exercices']
Topic 2: ['goodmorning' 'thankful' 'thankyou' 'obrigada' 'grazie' 'спасибо'
 'gracias' 'morning' 'thank' 'day']
Topic 3: ['mujhe' 'abhi' 'hai' 'bhi' 'hoga' 'hii' 'dekha' 'kya' 'yoga' 'fat']
Topic 4: ['yoga' 'yogas' 'yogawithamit' 'yogavcommunity' 'yogatx' 'exercises'
 'abrazo' 'exercise' 'meditative' 'inspiring']
Topic 5: ['vids' 'videos' 'thankyou' 'video' 'видео' 'thx' 'vid' 'gracias' 'grazie'
 'спасибо']
Topic 6: ['class' 'classes' 'clases' 'clase' 'thankyou' 'awesome' 'cours' 'courses'
 'lesson' 'tutorial']
Topic 7: ['practice' 'practiced' 'practising' 'practicing' 'practise' 'practica'
 'practised' 'practises' 'practices' 'pratice']
Topic 8: ['day' 'todays' 'morning' 'goodmorning' 'today' 'hoy' 'mornings'
 'afternoon' 'tomorrow' 

### Search Topics

In [38]:
# Keyword
interest = ["fun", "like", "interesting", "exciting", "happy"]
competence = ["challenge", "skill", "improve", "learn"]
appearance = ["attractive", "weight", "appearance"]
fitness = ["fit", "body", "healthy", "physical", "energy", "exercise"]
social = ["social", "friend", "other", "people"]

In [39]:
# topic 1: interest
print("Topic 1: interest")
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords=interest, num_topics=5)
for i, topic in enumerate(topic_nums):
    print(f"Topic: {topic} | Score: {topic_scores[i]}")
    print(f"Keywords: {topic_words[i][:10]}")
    print() 

# topic 2: competence
print("Topic 2: competence")
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords=competence, num_topics=5)
for i, topic in enumerate(topic_nums):
    print(f"Topic: {topic} | Score: {topic_scores[i]}")
    print(f"Keywords: {topic_words[i][:10]}")
    print()

# topic 3: appearance
print("Topic 3: appearance")
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords=appearance, num_topics=5)
for i, topic in enumerate(topic_nums):
    print(f"Topic: {topic} | Score: {topic_scores[i]}")
    print(f"Keywords: {topic_words[i][:10]}")
    print()

# topic 4: fitness
print("Topic 4: fitness")
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords=fitness, num_topics=5)
for i, topic in enumerate(topic_nums):
    print(f"Topic: {topic} | Score: {topic_scores[i]}")
    print(f"Keywords: {topic_words[i][:10]}")
    print()

# topic 5: social
print("Topic 5: social")
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords=social, num_topics=5)
for i, topic in enumerate(topic_nums):
    print(f"Topic: {topic} | Score: {topic_scores[i]}")
    print(f"Keywords: {topic_words[i][:10]}")
    print()



Topic 1: interest
Topic: 64 | Score: 0.3544735269099829
Keywords: ['merci' 'gracias' 'danke' 'obrigada' 'спасибо' 'grazie' 'thankyou'
 'thanks' 'thank' 'thx']

Topic: 39 | Score: 0.2910992796044205
Keywords: ['funny' 'hilarious' 'laughing' 'smile' 'laughed' 'laughter' 'laughs'
 'giggle' 'hahaha' 'smiling']

Topic: 250 | Score: 0.26098371436097545
Keywords: ['sylvie' 'obrigada' 'спасибо' 'gracias' 'grazie' 'merci' 'danke'
 'thankyou' 'thank' 'thanks']

Topic: 175 | Score: 0.2580773521462656
Keywords: ['gratitude' 'thankful' 'grateful' 'dank' 'thanking' 'спасибо' 'joyous'
 'thank' 'thx' 'joyful']

Topic: 34 | Score: 0.25804505786427057
Keywords: ['wonderful' 'congrats' 'fantastic' 'awesome' 'thankful' 'congratulations'
 'goodmorning' 'flow' 'amazing' 'gratefull']

Topic 2: competence
Topic: 7 | Score: 0.19887707506724006
Keywords: ['practice' 'practiced' 'practising' 'practicing' 'practise' 'practica'
 'practised' 'practises' 'practices' 'pratice']

Topic: 50 | Score: 0.1920880943336898
