<a href="https://colab.research.google.com/github/nyukkiw/analysis-sentiment/blob/main/AS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pandas vaderSentiment googletrans==4.0.0-rc1


Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-p

In [None]:
import time
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from googletrans import Translator



In [None]:
# ====== PENGATURAN ======
INPUT_CSV  = "myPertamina.csv"          # nama file input (punya kamu)
OUTPUT_CSV = "myPertamina_labeled.csv"  # nama file output
TEXT_COL   = "content"                  # kolom teks di CSV kamu
CHUNK_SIZE = 500                        # banyak baris per batch translate

# ====== LOAD DATA ======
print("Membaca CSV...")
df = pd.read_csv(INPUT_CSV)

MAX_ROWS = 500  # ganti 10000 kalau mau 10rb

if len(df) > MAX_ROWS:
    df = df.sample(n=MAX_ROWS, random_state=42).reset_index(drop=True)
    print(f"DISAMPLING: hanya memakai {MAX_ROWS} baris dari {len(df)} data awal.")


if TEXT_COL not in df.columns:
    raise ValueError(f"Kolom '{TEXT_COL}' tidak ada di CSV. Kolom yang tersedia: {list(df.columns)}")

analyzer = SentimentIntensityAnalyzer()
translator = Translator()



Membaca CSV...
DISAMPLING: hanya memakai 500 baris dari 500 data awal.


In [None]:
# ====== FUNGSI BANTU ======
def translate_text(text, src="id", dest="en"):
    """
    Translate satu teks. Kalau gagal 3x, balikin string kosong.
    """
    text = "" if pd.isna(text) else str(text)
    if not text.strip():
        return ""

    for attempt in range(3):
        try:
            result = translator.translate(text, src=src, dest=dest)
            return result.text
        except Exception as e:
            print(f"[WARN] Gagal translate (percobaan {attempt+1}): {e}")
            time.sleep(2)  # tunggu sebentar lalu coba lagi

    print("[ERROR] Gagal translate permanen, isi dikosongkan.")
    return ""


def vader_compound(text_en):
    """
    Hitung skor compound VADER untuk teks bahasa Inggris.
    """
    if not isinstance(text_en, str) or not text_en.strip():
        return 0.0
    scores = analyzer.polarity_scores(text_en)
    return scores["compound"]


def label_from_compound(c):
    """
    Konversi skor compound ke label sentimen.
    """
    if c >= 0.05:
        return "positive"
    elif c <= -0.05:
        return "negative"
    else:
        return "neutral"



In [None]:

# ====== STEP 1: TRANSLATE PER CHUNK ======
english_texts = []

n = len(df)
print(f"Total baris: {n}")

for start in range(0, n, CHUNK_SIZE):
    end = min(start + CHUNK_SIZE, n)
    print(f"Mentranslate baris {start} s.d. {end-1} ...")

    batch = df[TEXT_COL].iloc[start:end].tolist()
    batch_en = []

    for i, t in enumerate(batch):
        en = translate_text(t)
        batch_en.append(en)

        # optional: progress kecil di dalam batch
        if (i + 1) % 100 == 0:
            print(f"  -> {i+1} dari {len(batch)} dalam chunk ini selesai")

    english_texts.extend(batch_en)



Total baris: 500
Mentranslate baris 0 s.d. 499 ...
  -> 100 dari 500 dalam chunk ini selesai
  -> 200 dari 500 dalam chunk ini selesai
  -> 300 dari 500 dalam chunk ini selesai
  -> 400 dari 500 dalam chunk ini selesai
  -> 500 dari 500 dalam chunk ini selesai


In [None]:
# simpan hasil terjemahan ke kolom baru
df["content_en"] = english_texts

# ====== STEP 2: HITUNG SKOR & LABEL VADER ======
print("Menghitung skor VADER...")
df["vader_compound"] = df["content_en"].apply(vader_compound)
df["sentiment_vader"] = df["vader_compound"].apply(label_from_compound)

# ====== STEP 3: SIMPAN KE FILE BARU ======
df.to_csv(OUTPUT_CSV, index=False)
print(f"Selesai! Hasil disimpan ke '{OUTPUT_CSV}'")
print(df[[TEXT_COL, "content_en", "vader_compound", "sentiment_vader"]].head())


Menghitung skor VADER...
Selesai! Hasil disimpan ke 'myPertamina_labeled.csv'
                                             content  \
0                                      Sukses selalu   
1  Aplikasi nya ribet banget tida rekomen datang ...   
2                           Nik password tidak benar   
3                                     siiiiiiipppppp   
4  Pele ini apk susah masuk sedikit pin salah pad...   

                                          content_en  vader_compound  \
0                           Good luck for the future          0.7096   
1  The application is really complicated, don't r...         -0.4164   
2                        Nik password is not correct          0.0000   
3                                     siiiiiiipppppp          0.0000   
4  Pele this apk is difficult to enter, the pin i...         -0.6808   

  sentiment_vader  
0        positive  
1        negative  
2         neutral  
3         neutral  
4        negative  


In [None]:
# memulali analysis sentiment



df = pd.read_csv("myPertamina_labeled.csv")
df.head()



Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,content_en,vader_compound,sentiment_vader
0,b01f2b10-6378-4f94-b44a-f246243c2ec7,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Sukses selalu,5,0,4.3.3,2024-09-16 16:38:42,,,4.3.3,Good luck for the future,0.7096,positive
1,30c1d8a0-6668-4b73-ba12-0995b19d1792,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Aplikasi nya ribet banget tida rekomen datang ...,1,0,4.6.5,2024-12-23 15:24:58,"Hai, Sobat Ikhsan. Mimin informasikan pendafta...",2024-12-25 10:13:59,4.6.5,"The application is really complicated, don't r...",-0.4164,negative
2,38a8ebfc-1e43-4a2c-b65b-73868cd6eb7d,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Nik password tidak benar,1,0,4.4.1,2024-10-08 12:21:22,,,4.4.1,Nik password is not correct,0.0,neutral
3,6ac527cf-6c09-4356-96ed-e91f0d42c033,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,siiiiiiipppppp,5,0,4.6.5,2025-01-20 06:39:17,"Hai, Sobat Hafid. Terima kasih telah menggunak...",2025-01-30 09:13:19,4.6.5,siiiiiiipppppp,0.0,neutral
4,3084b4da-74ff-449d-9619-d3004a698855,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Pele ini apk susah masuk sedikit pin salah pad...,1,0,4.3.3,2024-07-18 11:42:56,,,4.3.3,"Pele this apk is difficult to enter, the pin i...",-0.6808,negative


In [None]:
df['sentiment_vader'].value_counts()


Unnamed: 0_level_0,count
sentiment_vader,Unnamed: 1_level_1
positive,225
negative,172
neutral,103


In [None]:
df.sort_values('vader_compound', ascending=False).head(10)[['content','vader_compound']]


Unnamed: 0,content,vader_compound
352,Aplikasinya bagus ga ribet cuman kekurangannya...,0.9182
193,Aplikasinya dalam 2 bulan terakhir ini mengece...,0.9039
149,"1.Pendaftaran rumit,2.nopol sudah terdaftar se...",0.9002
261,üòçüòçüòç,0.8402
21,Aplikasi ini salah satu upaya pemerintah untuk...,0.8278
6,"benar2 mantap gan,makasih",0.8204
355,Perusahaan elit app sulit.. Kaya yg ga punya d...,0.8176
119,please pemerintah yang terhormat.. permudahlah...,0.802
211,"Download aplikasinya, ikuti programnya, menang...",0.7964
477,"Aplikasi yg mantap,, beli bensin jadi mudah",0.7906


In [None]:
df.sort_values('vader_compound', ascending=True).head(10)[['content','vader_compound']]


Unnamed: 0,content,vader_compound
312,Dulu daftar pakai aplikasi mypertamina Verifik...,-0.9626
374,"aplikasi tolol, emang aplikasi dari pemerintah...",-0.9532
43,Setelah update apk nama saya berubah jadi DANA...,-0.9287
391,"Ribet banget sehh,rakyat mau beli bensin aja s...",-0.9225
448,"Ribet... Slalu bermasalah, kuota BBM bs hbs se...",-0.8922
128,daftar barcode tapi Verifikasi di tolak terus ...,-0.872
155,"Pendaftaran sangat sulit, Kasian bagi para ora...",-0.8687
426,"Saya bingung dengan app my pertamina, tujuan d...",-0.8607
56,"Aplikasi jelek,kurang matang servernys,lambat ...",-0.8455
231,Mau terima kode otp aja ngk kekirim2 jnjokk Te...,-0.8295
