In [None]:
# ================================
# 1) Load data bersih
# ================================
df = pd.read_csv("tokopedia_reviews_clean.csv")

# Kita pakai label 'sentiment' (neg/neu/pos) yang diturunkan dari rating
df = df.dropna(subset=["text","sentiment"]).drop_duplicates(subset=["text"]).reset_index(drop=True)

# Subset kecil dulu saat test koneksi (hemat kredit & waktu)
df_small = df.sample(60, random_state=42).reset_index(drop=True)
df_small["text"] = df_small["text"].astype(str)

df_small.head(3)


In [None]:
# ================================
# 2) Helper: Prompt & parsing output
# ================================
SENTINEL = {"neg","neu","pos"}

FEW_SHOT = """You are a sentiment classifier for Indonesian e-commerce product reviews.
Output ONLY one of: neg, neu, pos (lowercase).
Guidelines:
- neg: keluhan, kecewa, rusak, telat, tidak sesuai.
- neu: biasa saja, netral, antara puas dan tidak puas.
- pos: puas, bagus, sesuai, cepat, rekomendasi.

Contoh:
Review: "Barangnya jelek, kardus penyok, penjual slow response."
Label: neg

Review: "Biasa aja sih, sesuai harga."
Label: neu

Review: "Mantap, barang ori, pengiriman cepat. Recommended!"
Label: pos
"""

def build_prompt(review_text: str) -> str:
    return FEW_SHOT + f'\n\nReview: "{review_text}"\nLabel:'

def normalize_label(model_text: str) -> str:
    # Ambil token pertama yang mirip neg/neu/pos
    if not isinstance(model_text, str):
        return "neu"
    text = model_text.strip().lower()
    # ambil kata pertama / token yang relevan
    m = re.search(r"(neg|neu|pos)", text)
    return m.group(1) if m else "neu"


In [None]:
# ================================
# 3) Single-call test (sanity check)
# ================================
test_text = "Barangnya bagus, pengiriman cepat dan sesuai deskripsi."
out = replicate.run(MODEL_SLUG, input={"prompt": build_prompt(test_text)})
# Output bisa berupa list tokens atau string, handle keduanya
if isinstance(out, list):
    out = "".join(out)
print("RAW:", out)
print("PARSED:", normalize_label(out))


In [None]:
# ================================
# 4) Batch inference (mini-batching + retry)
# NOTE: Replicate punya rate limit; kita batch kecil2 dan kasih retry.
# ================================
def granite_predict_labels(texts, model_slug=MODEL_SLUG, max_retries=3, sleep_base=2.0):
    preds = []
    for t in tqdm(texts):
        attempt = 0
        while True:
            try:
                out = replicate.run(model_slug, input={"prompt": build_prompt(t)})
                if isinstance(out, list):
                    out = "".join(out)
                preds.append(normalize_label(out))
                break
            except Exception as e:
                attempt += 1
                if attempt > max_retries:
                    # fallback: kalau error terus, kasih 'neu'
                    preds.append("neu")
                    break
                time.sleep(sleep_base * attempt)
    return preds

texts = df_small["text"].tolist()
y_true = df_small["sentiment"].tolist()

y_pred = granite_predict_labels(texts)
df_small["granite_pred"] = y_pred

print(df_small[["text","sentiment","granite_pred"]].head(10))
print("\nMacro-F1 (sample):", f1_score(y_true, y_pred, average="macro"))
print("\nReport:\n", classification_report(y_true, y_pred, digits=4))


In [None]:
# ================================
# 5) Evaluasi skala lebih besar
# ================================
N = 1000
df_eval = df.sample(min(N, len(df)), random_state=123).reset_index(drop=True)
y_true_eval = df_eval["sentiment"].tolist()
pred_eval = granite_predict_labels(df_eval["text"].tolist())

print("Macro-F1 (N={}):".format(len(df_eval)), f1_score(y_true_eval, pred_eval, average="macro"))
print(classification_report(y_true_eval, pred_eval, digits=4))
df_eval["granite_pred"] = pred_eval
df_eval.to_csv("granite_predictions_eval.csv", index=False)


In [None]:
# ================================
# 6) Summarization (Insight per-Produk / per-Kategori)
# Kita ambil 100 review per kategori/produk, minta Granite rangkum keluhan & pujian utama
# ================================
def summarize_reviews(reviews, model_slug=MODEL_SLUG, max_chars=6000):
    # gabung review jadi blok panjang, tapi batasi karakter biar aman
    block = "\n- " + "\n- ".join(reviews)
    block = block[:max_chars]
    prompt = f"""
You are an analyst. Read these Indonesian e-commerce reviews (bullet points) and produce:
1) Top 3 complaints (singkat, poin-poin)
2) Top 3 positives (singkat, poin-poin)
3) One actionable recommendation

Output in Indonesian, concise.

Reviews:
{block}
"""
    out = replicate.run(model_slug, input={"prompt": prompt})
    if isinstance(out, list):
        out = "".join(out)
    return out.strip()

# Contoh: per-kategori
cat = df["category"].value_counts().index[0]
sample_reviews = df.loc[df["category"]==cat, "text"].astype(str).head(120).tolist()
summary_cat = summarize_reviews(sample_reviews)
print(f"== Ringkasan kategori: {cat} ==")
print(summary_cat)

with open(f"summary_category_{cat}.txt","w", encoding="utf-8") as f:
    f.write(summary_cat)
