In [12]:
!pip install google-play-scraper pandas



In [13]:
import pandas as pd
from google_play_scraper import Sort, reviews

app_id_wattpad = 'wp.wattpad'

COLUMNS_TO_DROP = [
    'reviewId',
    'userImage',
    'thumbsUpCount',
    'appVersion',
]

# =================================================================
# === SCRAPPING DATA & KONVERSI KE DATAFRAME ===
# =================================================================

print("Memulai scrapping data Wattpad...")
result, _ = reviews(
    app_id_wattpad,
    lang='id',
    country='id',
    sort=Sort.NEWEST,
    count=1000,
    filter_score_with=None
)

df_reviews = pd.DataFrame(result)
print(f"✅ Data berhasil di-scrapping: {len(df_reviews)} ulasan.")

# =================================================================
# === PEMBUATAN DATASET ORIGINAL ===
# =================================================================

df_original_clean = df_reviews.drop(columns=COLUMNS_TO_DROP, errors='ignore')

print("✅ Dataset Original dibuat dan disimpan.")
dataset_ori = 'wattpad_reviews_original.csv'
df_original_clean.to_csv(dataset_ori, index=False)

# =================================================================
# === PEMBUATAN DATASET FINAL (Case Folding) ===
# =================================================================

df_reviews['content_clean'] = df_reviews['content'].str.lower()
print("✅ Case Folding pada 'content' selesai.")

columns_to_drop_final = COLUMNS_TO_DROP + ['content']
df_reviews_clean = df_reviews.drop(columns=columns_to_drop_final, errors='ignore')

print("✅ Dataset Final (Case Folding) dibuat.")
print(f"Kolom tersisa: {df_reviews_clean.columns.tolist()}")

dataset_casefloding = 'wattpad_reviews_case_folded.csv'
df_reviews_clean.to_csv(dataset_casefloding, index=False)
print(f"✅ Data final disimpan sebagai: {dataset_casefloding}")

# =================================================================
# === PREVIEW DATA ===
# =================================================================

print("\n--- Contoh Data Original (Kolom Bersih) ---")
print(df_original_clean[['userName', 'score', 'content']].head())

print("\n--- Contoh Data Final (Case Folding & Kolom Bersih) ---")
print(df_reviews_clean[['userName', 'score', 'at', 'content_clean']].head())

Memulai scrapping data Wattpad...
✅ Data berhasil di-scrapping: 1000 ulasan.
✅ Dataset Original dibuat dan disimpan.
✅ Case Folding pada 'content' selesai.
✅ Dataset Final (Case Folding) dibuat.
Kolom tersisa: ['userName', 'score', 'reviewCreatedVersion', 'at', 'replyContent', 'repliedAt', 'content_clean']
✅ Data final disimpan sebagai: wattpad_reviews_case_folded.csv

--- Contoh Data Original (Kolom Bersih) ---
             userName  score  \
0        Nur kholifah      3   
1          Ridestiana      2   
2  Gracesiella Amanda      1   
3             E L I O      1   
4            Dian a.l      1   

                                             content  
0            ya ampun sering banget ngga bisa dibuka  
1  kok sekarang malah gak bisa offline sih padaha...  
2  Gw gak masalah sih ini mau di duitin terus tap...  
3  ini kenapa sekarang gak bisa naruh link di inf...  
4  Ni aplikasi ada masalah apa sih? sinyal gua ba...  

--- Contoh Data Final (Case Folding & Kolom Bersih) ---
    