In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

from googletrans import Translator
from textblob import TextBlob

import pandas as pd
import time
import random
import re

In [2]:
def scrape_tokopedia(query):
    options = Options()
    options.add_argument("--disable-gpu")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    url = f"https://www.tokopedia.com/search?st=product&q={query.replace(' ', '%20')}"
    driver.get(url)
    time.sleep(5)

    results = []

    cards = driver.find_elements(By.CSS_SELECTOR, 'a[data-theme="default"]')[:4]  # Ambil 4 produk
    for card in cards:
        try:
            link = card.get_attribute('href')

            try:
                toko = card.find_element(By.CSS_SELECTOR, 'span[class*="T0rpy-LEw"]').text
            except:
                toko = "Unknown"
                if link:
                    temp = link.replace("https://www.tokopedia.com/", "")
                    toko = temp.split("/", 1)[0].replace("-", " ")

            try:
                nama_produk = card.find_element(By.CSS_SELECTOR, 'span[class*="0T8-iGxMp"]').text
            except:
                nama_produk = query
                if link:
                    temp = link.replace(f"https://www.tokopedia.com/{toko}/", "")
                    nama_produk = temp.replace("-", " ")

            try:
                image = card.find_element(By.TAG_NAME, 'img').get_attribute('src')
            except:
                image = "Image"

            try:
                harga = card.find_element(By.CSS_SELECTOR, 'div[class*="67d6E1xDK"]').text
            except:
                harga = "Rp. -"

            results.append({
                'Nama Produk': nama_produk,
                'Harga': harga,
                'Toko': toko,
                'Gambar': image,
                'Link': link
            })
        except Exception as e:
            print("Error parsing card:", e)
            continue

    driver.quit()
    return results

In [12]:
def ambil_ulasan_dari_link(url, nama_produk):
    options = Options()
    # options.add_argument("--headless")
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    time.sleep(3)

    try:
        close_button = driver.find_element(By.CSS_SELECTOR, 
            "div.css-kl3gjj.e1nc1fa20 > article > div > div.css-11hzwo5 > button")
        close_button.click()
    except NoSuchElementException:
        pass

    driver.execute_script("window.scrollBy(0, 1000);")
    time.sleep(2)

    try:
        tab_ulasan_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 
                "#header-main-wrapper > div.css-1mbdz04.e90swyx6 > div > nav > div > div.css-1kx9443-unf-tab-wrapper.ehv0kkf0 > div > button:nth-child(2)"))
                #header-main-wrapper > div.css-1mbdz04.e90swyx6 > div > nav > div > div.css-1kx9443-unf-tab-wrapper.ehv0kkf0 > div > button:nth-child(2)
        )
        tab_ulasan_button.click()
    except TimeoutException:
        driver.quit()
        return []

    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "review-feed"))
        )
    except TimeoutException:
        driver.quit()
        return []

    articles = driver.find_elements(By.CSS_SELECTOR, "#review-feed > article")
    reviews = []
    for article in articles:
        try:
            span = article.find_element(By.CSS_SELECTOR, "p.css-cvmev1-unf-heading.e1qvo2ff8 > span")
            reviews.append({
                'Nama Produk': nama_produk,
                'Ulasan': span.text,
                'Link Produk': url
            })
        except NoSuchElementException:
            continue

    driver.quit()
    return random.sample(reviews, min(5, len(reviews)))


In [4]:
def analisis_sentimen(teks):
    translator = Translator()
    try:
        teks_terjemahan = translator.translate(teks, src='id', dest='en').text
    except:
        teks_terjemahan = teks
    blob = TextBlob(teks_terjemahan)
    if blob.sentiment.polarity > 0:
        return 'positive', blob.sentiment.polarity
    elif blob.sentiment.polarity == 0:
        return 'neutral', blob.sentiment.polarity
    else:
        return 'negative', blob.sentiment.polarity

In [5]:
def label_ke_skor(polarity):
    if polarity > 0.2:
        return round(7 + (polarity * 3), 2)
    elif polarity < -0.2:
        return round(1 + (polarity * 3), 2)
    else:
        return 5


In [13]:
query = "sunscreen azarine"
produk = scrape_tokopedia(query)
df_produk = pd.DataFrame(produk)

# Ambil ulasan dari semua produk
all_reviews = []
for item in produk:
    ulasan = ambil_ulasan_dari_link(item['Link'], item['Nama Produk'])
    all_reviews.extend(ulasan)

df_ulasan = pd.DataFrame(all_reviews)

# Analisis sentimen
if not df_ulasan.empty:
    df_ulasan["Sentimen"], df_ulasan["Polaritas"] = zip(*df_ulasan["Ulasan"].apply(analisis_sentimen))
    df_ulasan["Skor"] = df_ulasan["Polaritas"].apply(label_ke_skor)


In [14]:
# === Tampilkan hasil ===
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

print("\n==== Data Produk ====\n")
print(df_produk)

print("\n==== Data Ulasan + Analisis ====\n")
print(df_ulasan)

pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')


==== Data Produk ====

                                         Nama Produk     Harga  \
0  Azarine Sunscreen Gel Hydrasoothe SPF45 Tube 5...  Rp65.000   
1  Azarine Sunscreen Serum HydraMax C SPF50 Tube ...  Rp65.000   
2  Azarine Facial Cleanser C White Brightening Tu...  Rp35.000   
3      Azarine Hydrasoothe Sunscreen Gel SPF 45 50ml  Rp66.000   

                      Toko                                             Gambar  \
0                  farmaku  https://images.tokopedia.net/img/cache/200-squ...   
1                  farmaku  https://images.tokopedia.net/img/cache/200-squ...   
2                  farmaku  https://images.tokopedia.net/img/cache/200-squ...   
3  Guardian Official Store  https://images.tokopedia.net/img/cache/200-squ...   

                                                Link  
0  https://www.tokopedia.com/farmaku/azarine-suns...  
1  https://www.tokopedia.com/farmaku/azarine-suns...  
2  https://www.tokopedia.com/farmaku/azarine-faci...  
3  https://www.toko

In [15]:
# Rata-rata skor & kesimpulan
if not df_ulasan.empty:
    rata_rata = round(df_ulasan["Skor"].mean(), 2)
    kesimpulan = "✅ Worth It!" if rata_rata >= 7 else ("⚠️ Considerable" if rata_rata >= 4 else "❌ Not Worth")
    print(f"\nRata-rata skor ulasan: {rata_rata}/10")
    print(f"Kesimpulan: {kesimpulan}")


Rata-rata skor ulasan: 6.9/10
Kesimpulan: ⚠️ Considerable
