# *Libraries*

In [1]:
!pip install sentence_transformers --quiet
!pip install bertopic --quiet

In [2]:
import pandas as pd
import numpy as np

In [3]:
from IPython.display import clear_output
from tqdm import tqdm
from bertopic import BERTopic

clear_output()

# *Load All Model*

In [4]:
BASE_DIR = '/kaggle/input/topic-modelling-satdat'
NO_URUT = 3
JUMLAH_PART = 21

In [5]:
list_of_model = []
for i in tqdm(range(1,JUMLAH_PART+1)):
    dir_model = f"{BASE_DIR}/cat_{NO_URUT}_part_{i}/saved_model"
    model = BERTopic.load(dir_model)
    list_of_model.append(model)

  0%|          | 0/21 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/235k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

100%|██████████| 21/21 [00:31<00:00,  1.49s/it]


In [6]:
merged_model = BERTopic.merge_models(list_of_model,min_similarity=0.92)

# *Result and Custom Label*

In [7]:
topic_model = merged_model

In [8]:
# Show topics
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,KeyBERT,Representative_Docs
0,-1,99537,-1_kpu_malaysia_ratusan ribu_ribu,"[kpu, malaysia, ratusan ribu, ribu, ratusan, j...","[ratusan ribu, kpu sibuk, dislepatch, bansos k...",
1,0,295692,0_hebat_indonesia_rakyat_program,"[hebat, indonesia, rakyat, program, masyarakat...","[m3nang, internet gratis, generasi muda, hebat...",
2,1,1335,1_happy new_year_new year_new,"[happy new, year, new year, new, happy, ekonom...","[year indonesia, new year, happy new, year vis...",
3,2,1206,1_int3rnet_gratiskan int3rnet_gratiskan_progra...,"[int3rnet, gratiskan int3rnet, gratiskan, prog...","[gratiskan int3rnet, int3rnet semoga, int3rnet...",
4,3,18001,3_sehat_dukung_mantap_gaspol,"[sehat, dukung, mantap, gaspol, menang, semang...","[sehat bu, sehat atikoh, bu atikoh, bu atiqoh,...",
5,4,1691,11_rakyat b3rsama_b3rsama_b3rsama rakyat_rakyat,"[rakyat b3rsama, b3rsama, b3rsama rakyat, raky...","[rakyat b3rsama, b3rsama rakyat, b3rsama mendu...",
6,5,212,14_psi_jawa timur_timur_rakyat jawa,"[psi, jawa timur, timur, rakyat jawa, jawa, ti...","[memikat hati, gurem psi, kehilangan arah, psi...",
7,6,651,1_b3rkorban_keamanan_aparat_anggota polri,"[b3rkorban, keamanan, aparat, anggota polri, t...","[b3rkorban indonesia, b3rkorban kepentingan, b...",
8,7,1190,1_selatan_kerjasama_internasional_selatan selatan,"[selatan, kerjasama, internasional, selatan se...","[tingkat internasional, peluang indonesia, mem...",
9,8,909,2_posyandu_insentif_kesehatan_kader posyandu,"[posyandu, insentif, kesehatan, kader posyandu...","[kader posyandu, posyandu garda, posyandu indo...",


In [9]:
info_df = topic_model.get_topic_info()

info_df.to_csv('topic-information.csv',index=False)

In [10]:
# or use one of the other topic representations, like KeyBERTInspired
pos_topic_labels = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in topic_model.topic_aspects_["KeyBERT"].items()}
topic_model.set_topic_labels(pos_topic_labels)

# *Visualize Topic*

In [11]:
try:
    fig = topic_model.visualize_barchart()
    fig.show()
except:
    print("Gagal")

In [12]:
try:
    fig = topic_model.visualize_topics(custom_labels=True)
    fig.show()
except:
    print("Gagal")

In [13]:
try:
    fig = topic_model.visualize_hierarchy(custom_labels=True)
    fig.show()
except:
    print("Gagal")

In [14]:
try:
    fig = topic_model.visualize_heatmap()
    fig.show()
except:
    print("Gagal")