# Saving From Web with Browser

This is a browser automation way of saving files from the Web.

In [None]:
import asyncio
import getpass
import logging
import platform
import nest_asyncio
from pathlib import Path
from playwright.async_api import async_playwright
from playwright_stealth import stealth_async

# You may wish to change this from default. This script gives you the time & instructions to do so.
user_profile_name = "Person 1"  # You can change this to any folder name you prefer

# Apply nest_asyncio to run in Jupyter
nest_asyncio.apply()

# Set up logging to capture and silence errors
logging.basicConfig(level=logging.INFO)
logging.getLogger('asyncio').setLevel(logging.CRITICAL)

# User data folder configuration
username = getpass.getuser()
system = platform.system()

# Common paths
windows_base_path = f"C:\\Users\\{username}\\AppData\\Local\\Google\\Chrome\\User Data"
macos_base_path = f"/Users/{username}/Library/Application Support/Google/Chrome"
linux_base_path = f"/home/{username}/.config/google-chrome"

if system == "Windows":
    chrome_path = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
    user_data_dir = f"{windows_base_path}\\{user_profile_name}"
    local_download_path = f"C:\\Users\\{username}\\Downloads"
elif system == "Darwin":  # macOS
    chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
    user_data_dir = f"{macos_base_path}/{user_profile_name}"
    local_download_path = f"/Users/{username}/Downloads"
elif system == "Linux":  # Ubuntu/Debian
    chrome_path = "/usr/bin/google-chrome"
    user_data_dir = f"{linux_base_path}/{user_profile_name}"
    local_download_path = f"/home/{username}/Downloads"
else:
    raise Exception("Unsupported operating system")

url = "https://wsform.com/knowledgebase/sample-csv-files/"
retry_interval = 10
max_retries = 10

async def automation():
    async with async_playwright() as plwt:
        context = await plwt.chromium.launch_persistent_context(
            user_data_dir,
            slow_mo=50,
            executable_path=chrome_path,
            accept_downloads=True,
            downloads_path=local_download_path,
            ignore_default_args=["--enable-automation"],
            headless=False
        )
        
        page = await context.new_page()

        # Apply stealth
        await stealth_async(page)
        
        await page.goto(url)
        logging.info(f"Navigated to {url}")

        async with page.expect_download() as download_info:
            await page.click('text=industry.csv')
            logging.info("Clicked on the download link for industry.csv")
            download = await download_info.value
        
        try:
            path = await download.path()
            suggested_filename = download.suggested_filename
            the_file = Path(local_download_path) / suggested_filename
            await download.save_as(the_file)
            logging.info(f"Saved {suggested_filename} to {local_download_path}")
        except Exception as e:
            logging.error(f"Could not save {suggested_filename} to {local_download_path}")

        # Make sure it's downloaded, because async downloads can be interrupted by proceeding ahead too fast.
        for _ in range(max_retries):
            if the_file.exists():
                # File found, exit the loop
                logging.info(f"File '{the_file}' found.")
                break
            else:
                # File not found, wait a bit and try again
                logging.error(f"File '{the_file}' not found. Retrying...")
                await asyncio.sleep(retry_interval)

        await asyncio.sleep(5)  # Lots of time given to set up persistent login.
        await context.close()
        # logging.info("Browser closed")

print("Starting browser automation...")
asyncio.run(automation())
print("Done!")