In [None]:
# Bitermplus butuh Java biasanya sudah ada di Colab, tapi kita install library python-nya
!pip install bitermplus pandas

import bitermplus as btm
import pandas as pd
import numpy as np

In [None]:
def get_raw_url(github_url):
    if 'github.com' in github_url and '/blob/' in github_url:
        return github_url.replace('github.com', 'raw.githubusercontent.com').replace('/blob/', '/')
    return github_url

# === CONFIG URL ===
url_file_github = "https://github.com/USERNAME_KAMU/NAMA_REPO/blob/main/data.csv"

try:
    print("⏳ Loading data...")
    df = pd.read_csv(get_raw_url(url_file_github))
    col_text = next((c for c in df.columns if c.lower() in ['text', 'tweet', 'content', 'review']), df.columns[0])
    
    # Ambil list teks
    texts = df[col_text].astype(str).tolist()

    # 1. Preprocessing khusus BTM
    print("⏳ Vectorizing...")
    # X adalah matriks frekuensi, vocab adalah kamus kata
    X, vocabulary, vocab_dict = btm.get_words_freqs(texts)
    
    # Konversi dokumen ke vektor biterms
    docs_vec = btm.get_vectorized_docs(texts, vocabulary)
    biterms = btm.get_biterms(docs_vec)

    # 2. Training BTM
    print("⏳ Training BTM Model...")
    model = btm.BTM(X, vocabulary, seed=12321, T=3, M=20, alpha=50/8, beta=0.01)
    model.fit(biterms, iterations=20)

    # 3. Tampilkan Hasil
    print("\n=== HASIL TOPIK BTM ===")
    p_zd = model.transform(docs_vec) # Probabilitas topik per dokumen
    top_words = btm.get_top_topic_words(model, words_num=10, topics_num=3)
    
    print(top_words)

except Exception as e:
    print(f"Error: {e}")