In [1]:
import re
import json
import requests
import pandas as pd


In [2]:

def parse_shopee_url(url):
    """
    Extracts shop_id and item_id from a Shopee product URL.
    
    Parameters:
    - url (str): Shopee product URL.
    
    Returns:
    - tuple: (shop_id, item_id) if successful; otherwise None.
    """
    match = re.search(r'i\.(\d+)\.(\d+)', url)
    if match:
        shop_id, item_id = match[1], match[2]
        return shop_id, item_id
    else:
        print("Invalid URL format. Could not extract shop_id and item_id.")
        return None, None

def fetch_reviews(shop_id, item_id, limit=50, offset=0):
    """
    Fetches reviews from Shopee API for a specified shop and item.
    
    Parameters:
    - shop_id (str): Shop ID extracted from the product URL.
    - item_id (str): Item ID extracted from the product URL.
    - limit (int): Number of reviews to fetch per API call (default: 50).
    - offset (int): Offset for pagination (default: 0).
    
    Returns:
    - dict: JSON data of the product reviews.
    """
    ratings_url = f'https://shopee.co.id/api/v2/item/get_ratings?filter=0&flag=1&itemid={item_id}&limit={limit}&offset={offset}&shopid={shop_id}&type=0'
    response = requests.get(ratings_url)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data: Status code {response.status_code}")
        return None

def reviews_to_dataframe(review_data):
    """
    Converts Shopee review JSON data to a pandas DataFrame.
    
    Parameters:
    - review_data (dict): JSON data of the reviews.
    
    Returns:
    - DataFrame: DataFrame containing the reviews.
    """
    if review_data:
        df = pd.DataFrame(review_data)
        return df
    else:
        print("No data to convert to DataFrame.")
        return pd.DataFrame()

def main(url, limit=50, offset=0):
    """
    Main function to parse the URL, fetch reviews, and convert them to a DataFrame.
    
    Parameters:
    - url (str): Shopee product URL.
    - limit (int): Number of reviews per API call.
    - offset (int): Starting offset for reviews.
    
    Returns:
    - DataFrame: DataFrame with the product reviews.
    """
    shop_id, item_id = parse_shopee_url(url)
    if shop_id and item_id:
        review_data = fetch_reviews(shop_id, item_id, limit, offset)
        df = reviews_to_dataframe(review_data)
        return df
    else:
        return pd.DataFrame()



In [3]:

# Example usage:
url = 'https://shopee.co.id/Official-Xiaomi-Redmi-12-(8GB-128GB)-(8GB-256GB)-MediaTek-Helio-G88-5000-mAh-18W-90Hz-FHD-50MP-Triple-Kamera-AI-i.51925611.20282046522?xptdk=27a3d8c0-b7e0-405c-817d-5f91385649ca'
df = main(url)
print(df.head())

Failed to fetch data: Status code 403
No data to convert to DataFrame.
Empty DataFrame
Columns: []
Index: []
