IMPORT LIBRARY

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

INISIALISASI URL DASAR

In [2]:
baseUrl = "https://sandbox.oxylabs.io/products/category/pc"
page_num = 1

KLASIFIKASI HARGA

In [None]:
def classify_price(price_str):
    # mata uang dollar atau euro
    try:
        price_str = price_str.replace('$', '').replace('€', '').replace(',', '.').strip()
        price = float(price_str)
        if price < 80:
            return 'Murah'
        elif price <= 89 and price >= 80 :
            return 'Sedang'
        else:
            return 'Mahal'
    except Exception:
        return 'Unknown'

SCRAPING WEBSITE

In [5]:
result = []
for page_num in range(1, 4):
    url = f"{baseUrl}?page_num={page_num}"
    print(f"Scraping page: {url}")

    response = requests.get(url)
    if response.status_code != 200:
        print("Failed to retrieve the page or no more pages available.")
        break

    # parsing website
    soup = BeautifulSoup(response.text, 'html.parser')
    produk = soup.find_all("div", class_="product-card css-e8at8d eag3qlw10")

    if not produk:
        print("No more data found, ending the scraping process.")
        break

    for item in produk:
        nameGames = item.find("h4", class_="title css-7u5e79 eag3qlw7").get_text(strip=True)
        tags = item.find('p', class_="category css-8fdgzc eag3qlw9")
        if tags:
            tags = " | ".join([tag.get_text() for tag in tags])
        price = item.find("div", class_="price-wrapper css-li4v8k eag3qlw4").get_text(strip=True)
        price_class = classify_price(price)
        result.append((nameGames, tags, price, price_class))

Scraping page: https://sandbox.oxylabs.io/products/category/pc?page_num=1
Scraping page: https://sandbox.oxylabs.io/products/category/pc?page_num=2
Scraping page: https://sandbox.oxylabs.io/products/category/pc?page_num=3


STATISTIK

In [6]:
df = pd.DataFrame(result, columns=['Name', 'Tags', 'Price', 'Price_Class'])

# statistik harga game
total = len(df)
persen = df['Price_Class'].value_counts(normalize=True) * 100

stat_df = pd.DataFrame({
    'Kelas Harga': persen.index,
    'Persentase (%)': persen.values
})

# rata-rata harga
def parse_price(price_str):
    try:
        price_str = price_str.replace('$', '').replace('€', '').replace(',', '.').strip()
        return float(price_str)
    except Exception:
        return None

df['Parsed_Price'] = df['Price'].apply(parse_price)
avg_price = df['Parsed_Price'].dropna().mean()


avg_row = pd.DataFrame({'Kelas Harga': ['Rata-rata'], 'Persentase (%)': [avg_price if avg_price else 'Tidak dapat dihitung']})
stat_df = pd.concat([stat_df, avg_row], ignore_index=True)

VISUALISASI DATA

In [10]:
# Visualisasi

# pie char
plt.figure(figsize=(6,6))
df['Price_Class'].value_counts().plot.pie(autopct='%1.1f%%', colors=['#8fd3f4','#f4b183','#a9d18e'])
plt.title('Distribusi Kelas Harga Game')
plt.ylabel('')
plt.tight_layout()
plt.savefig('pieChart.png')
plt.close()

# bar chart
plt.figure(figsize=(6,4))
df['Price_Class'].value_counts().plot.bar(color=['#8fd3f4','#f4b183','#a9d18e'])
plt.title('Distribusi Kelas Harga Game')
plt.xlabel('Kelas Harga')
plt.ylabel('Jumlah Game')
plt.tight_layout()
plt.savefig('barChart.png')
plt.close()

EXPORT KE EXCEL

In [11]:
# Export ke Excel (sheet1: data, sheet2: statistik, sheet3: visualisasi)
with pd.ExcelWriter('dataHasilScraping.xlsx', engine='xlsxwriter') as writer:
    df.to_excel(writer, sheet_name='sheet1', index=False)
    stat_df.to_excel(writer, sheet_name='statistik', index=False)
    
    workbook  = writer.book
    worksheet = workbook.add_worksheet('visualisasi')
    writer.sheets['visualisasi'] = worksheet

    worksheet.insert_image('B2', 'pieChart.png')
    worksheet.insert_image('B20', 'barChart.png')