In [4]:
import urllib.parse
import json
from pathlib import Path

def parse_carousell_url(url):
    parsed_url = urllib.parse.urlparse(url)
    query_params = urllib.parse.parse_qs(parsed_url.query)

    path_parts = parsed_url.path.strip('/').split('/')

    if path_parts[0] == 'categories':
        category = path_parts[1] if len(path_parts) > 1 else None
        search_query = query_params.get('search', [None])[0]
    else:
        category = None
        search_query = path_parts[1] if len(path_parts) > 1 else None

    sort_by = query_params.get('sort_by', ['3'])[0]  # Default to 3 if not present

    price_start = query_params.get('price_start', [None])[0]
    price_end = query_params.get('price_end', [None])[0]

    tab = query_params.get('tab', [None])[0]

    return {
        "category": category,
        "query": search_query,
        "sort_by": int(sort_by),
        "price_start": int(price_start) if price_start else None,
        "price_end": int(price_end) if price_end else None,
        "tab": tab
    }

def build_url(search_item, base_url):
    params = {}
    
    if search_item['query']:
        params['search'] = search_item['query']
    if search_item['price_start'] is not None:
        params['price_start'] = search_item['price_start']
    if search_item['price_end'] is not None:
        params['price_end'] = search_item['price_end']
    if search_item['sort_by'] is not None:
        params['sort_by'] = search_item['sort_by']
    if search_item['tab'] and search_item['tab'] != 'all':
        params['tab'] = search_item['tab']

    category = search_item['category']
    url = f"{base_url}categories/{category}/" if category else base_url

    if params:
        encoded_params = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)
        url += f"?{encoded_params}"

    return url

def test_url(url, base_url):
    print(f"\nTesting URL: {url}")
    print("-" * 50)
    
    # Parse the URL
    parsed_data = parse_carousell_url(url)
    print("Parsed Data:")
    for key, value in parsed_data.items():
        print(f"  {key}: {value}")
    
    # Rebuild the URL
    rebuilt_url = build_url(parsed_data, base_url)
    print("\nRebuilt URL:")
    print(rebuilt_url)
    
    print("\nVerification:")
    if url == rebuilt_url:
        print("✅ Original URL and rebuilt URL match exactly.")
    else:
        print("⚠️ URLs don't match exactly. Please verify if the differences are acceptable.")
    
    print("\n")

def main():
    base_url = "https://www.carousell.sg/"
    
    test_urls = [
        "https://www.carousell.sg/categories/ipad-5716/?price_end=350&price_start=150&search=ipad%209th%20gen&searchId=h9efHL&sort_by=3&tab=marketplace",
        "https://www.carousell.sg/categories/5704/?search=apple%20pencil%20gen%201&searchId=vywyq7&sort_by=3",
        # Add more test URLs here
    ]
    
    for url in test_urls:
        test_url(url, base_url)
    
    while True:
        user_url = input("Enter a Carousell URL to test (or 'q' to quit): ").strip()
        if user_url.lower() == 'q':
            break
        test_url(user_url, base_url)

if __name__ == "__main__":
    main()


Testing URL: https://www.carousell.sg/categories/ipad-5716/?price_end=350&price_start=150&search=ipad%209th%20gen&searchId=h9efHL&sort_by=3&tab=marketplace
--------------------------------------------------
Parsed Data:
  category: ipad-5716
  query: ipad 9th gen
  sort_by: 3
  price_start: 150
  price_end: 350
  tab: marketplace

Rebuilt URL:
https://www.carousell.sg/categories/ipad-5716/?search=ipad%209th%20gen&price_start=150&price_end=350&sort_by=3&tab=marketplace

Verification:
⚠️ URLs don't match exactly. Please verify if the differences are acceptable.



Testing URL: https://www.carousell.sg/categories/5704/?addRecent=false&canChangeKeyword=false&includeSuggestions=false&search=apple%20pencil%20gen%201&searchId=GegnGI&t-search_query_source=ss_dropdown
--------------------------------------------------
Parsed Data:
  category: 5704
  query: apple pencil gen 1
  sort_by: 3
  price_start: None
  price_end: None
  tab: None

Rebuilt URL:
https://www.carousell.sg/categories/5704/?s