In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

def rating_to_text(rating):
    if rating == 1:
        return "Satu bintang"
    elif rating == 2:
        return "Dua bintang"
    elif rating == 3:
        return "Tiga bintang"
    elif rating == 4:
        return "Empat bintang"
    elif rating == 5:
        return "Lima bintang"
    else:
        return "Rating tidak valid"

def extract_reviews(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    cards = soup.find_all('div', class_='c-reviews-item')

    data = []

    for card in cards:
        rating_element = card.find('div', class_='c-rating__fg c-rating__fg--default')
        rating = len(rating_element.find_all('span', class_='c-rating__unit c-icon c-icon--medium c-icon--star'))
        content = card.find('h4', class_='c-reviews-item__title u-mrgn-top--1').text.strip()
        date = card.find('p', class_='c-reviews-item__date u-fg--ash').text.strip()
        date = re.search(r'\d+\s\w+,\s\d+', date).group()
        username = card.find('div', class_='c-reviews-item__foot').text.strip()
        data.append([rating, content, date, username])

    return data

def scrape_all_reviews(url):
    all_reviews_data = []

    # Loop melalui setiap halaman ulasan
    page_number = 1
    while True:
        current_url = f"{url}&page={page_number}"
        reviews_data = extract_reviews(current_url)
        
        # Jika tidak ada ulasan lagi, hentikan perulangan
        if not reviews_data:
            break
        
        all_reviews_data.extend(reviews_data)
        page_number += 1

    return all_reviews_data

def clean_and_save_reviews(data, filename='reviews.csv'):
    df = pd.DataFrame(data, columns=['rating', 'content', 'date', 'username'])

    # Mengubah nilai rating menjadi teks
    df['rating_text'] = df['rating'].apply(rating_to_text)

    # Bersihkan data
    df['date'] = pd.to_datetime(df['date'], format='%d %b, %Y')

    # Simpan ke dalam CSV
    df.to_csv(filename, index=False)
    print(f"Ulasan disimpan ke dalam {filename}")

if __name__ == "__main__":
    url = "https://www.bukalapak.com/p/fashion-pria/kemeja/ltxt5u-jual-kemeja-batik-kemeja-kerja-kemeja-kantor-kemeja-formal-kemeja-seragam-batik-pekalongan-murah?from=homepage&source=fvt&panel=3&type=recommendation"
    
    # Scraping semua ulasan
    all_reviews_data = scrape_all_reviews(url)
    
    # Membersihkan dan menyimpan ulasan
    clean_and_save_reviews(all_reviews_data)
