In [None]:
import argparse
import sys
from incidecoder_scraper import IncideCoderScraper

def parse_arguments():
    parser = argparse.ArgumentParser(description='Scrape cosmetic products from IncideCoder.com')
    
    parser.add_argument('--categories', nargs='+', default=[
        'moisturizer', 'serum', 'cleanser', 'mask'
    ], help='List of product categories to scrape')
    
    parser.add_argument('--max-pages', type=int, default=5,
                        help='Maximum number of pages to scrape per category')
    
    parser.add_argument('--max-products', type=int, default=100,
                        help='Maximum number of products to scrape per category')
    
    parser.add_argument('--output-csv', default='incidecoder_products.csv',
                        help='Filename for CSV output')
    
    parser.add_argument('--output-json', default='incidecoder_products.json',
                        help='Filename for JSON output')
    
    parser.add_argument('--load-json', default=None,
                        help='Load previous results from JSON file')
    
    parser.add_argument('--delay-min', type=float, default=1.0,
                        help='Minimum delay between requests (seconds)')
    
    parser.add_argument('--delay-max', type=float, default=3.0,
                        help='Maximum delay between requests (seconds)')
    
    return parser.parse_args()


def main():
    args = parse_arguments()
    
    # Create scraper instance with specified delay
    scraper = IncideCoderScraper(delay_range=(args.delay_min, args.delay_max))
    
    # Load previous results if specified
    if args.load_json:
        scraper.load_from_json(args.load_json)
        print(f"Loaded {len(scraper.products)} products from {args.load_json}")
    
    # Process each category
    for category_name in args.categories:
        # Create the category URL
        if not category_name.startswith('http'):
            category_url = f"https://incidecoder.com/products/product-type/{category_name}"
        else:
            category_url = category_name
            category_name = category_url.split('/')[-1]
        
        print(f"\nScraping category: {category_name}")
        
        # Scrape the category
        scraper.scrape_category(
            category_url, 
            max_pages=args.max_pages, 
            max_products=args.max_products
        )
        
        # Save intermediate results
        intermediate_json = f"incidecoder_{category_name}.json"
        scraper.save_to_json(intermediate_json)
        print(f"Saved intermediate results to {intermediate_json}")
    
    # Save final results
    scraper.save_to_csv(args.output_csv)
    scraper.save_to_json(args.output_json)
    print(f"Saved final results to {args.output_csv} and {args.output_json}")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nScraping interrupted by user. Exiting...")
        sys.exit(0)