In [10]:
import re
import json
import requests
import csv
from datetime import datetime

# Function to extract shop ID and item ID from the URL
def extract_shop_item_ids(url):
    match = re.search(r'i\.(\d+)\.(\d+)', url)
    if match:
        return match.group(1), match.group(2)
    else:
        print("Invalid URL")
        exit()

# URL to scrape
url = 'https://shopee.com.my/Sony-Bone-Conduction-Bluetooth-Earphone-Earring-Wireless-Ear-Clip-Earbuds-Sound-Earcuffs-Sport-Headset-With-Mic-i.48959264.20773737808?sp_atk=b253076a-d05e-47a0-8175-34a80703b02b&xptdk=b253076a-d05e-47a0-8175-34a80703b02b'

# Extract shop ID and item ID from the URL
shop_id, item_id = extract_shop_item_ids(url)

# Set initial offset and initialize list to store reviews
offset = 0
reviews_data = []

# Choose how many pages need to be scrape (here scrape only 3 pages of reviews)
for page in range(3):
    # Construct URL for fetching reviews
    ratings_url = f'https://shopee.com.my/api/v2/item/get_ratings?filter=0&flag=1&itemid={item_id}&limit=6&offset={offset}&shopid={shop_id}&type=0'

    # Fetch reviews data from the API
    data = requests.get(ratings_url).json()

    # Extract reviews from the data and add to reviews_data list
    for rating in data['data']['ratings']:
        reviewer_name = rating['author_username']
        
        # Extract date only
        timestamp = rating['ctime']
        review_date = datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d')
        
        # Combine all lines of comment into one string
        comment_lines = [line.strip() for line in rating['comment'].split('\n')]
        review_comment = ' '.join(comment_lines)
        
        reviews_data.append([reviewer_name, review_date, review_comment])

    # Check if there are more reviews to fetch
    # 6 is number of review per page
    if len(data['data']['ratings']) < 6:
        break

    # Increment offset for next page of reviews
    offset += 6

# Save reviews to a CSV file

#name the CSV file
csv_file = 'shopee_reviews.csv'
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Reviewer Name', 'Review Date', 'Review Comment'])
    writer.writerows(reviews_data)

print("Scraping and saving complete!")


Scraping and saving complete!
