In [25]:
from urllib.request import urlopen


def download_url(url):
    with urlopen(url, timeout=3) as connection:
        return connection.read(), url
    
    
def save_file(url, data, path):
    filename = path / f"{url.split('/')[-1]}"
    with open(str(filename), 'wb') as file:
        file.write(data)
    print(filename)

def download_and_save(url, path):
    data, url = download_url(url)
    save_file(url, data, path)

In [26]:
# python concurrency API docs
urls = [
    'https://docs.python.org/3/library/concurrency.html',
    'https://docs.python.org/3/library/concurrent.html',
    'https://docs.python.org/3/library/concurrent.futures.html',
    'https://docs.python.org/3/library/threading.html',
    'https://docs.python.org/3/library/multiprocessing.html',
    'https://docs.python.org/3/library/multiprocessing.shared_memory.html',
    'https://docs.python.org/3/library/subprocess.html',
    'https://docs.python.org/3/library/queue.html',
    'https://docs.python.org/3/library/sched.html',
    'https://docs.python.org/3/library/contextvars.html'
]

# 1. Iterates

In [27]:
from pathlib import Path
from tqdm import tqdm

path = Path('docs')
path.mkdir(parents=True, exist_ok=True)
for url in tqdm(urls):
    download_and_save(url, path)

 20%|██        | 2/10 [00:00<00:01,  5.53it/s]

docs/concurrency.html
docs/concurrent.html


 30%|███       | 3/10 [00:00<00:01,  5.14it/s]

docs/concurrent.futures.html


 40%|████      | 4/10 [00:00<00:01,  4.68it/s]

docs/threading.html


 50%|█████     | 5/10 [00:01<00:01,  3.88it/s]

docs/multiprocessing.html


 60%|██████    | 6/10 [00:01<00:00,  4.17it/s]

docs/multiprocessing.shared_memory.html


 80%|████████  | 8/10 [00:02<00:00,  3.18it/s]

docs/subprocess.html
docs/queue.html


100%|██████████| 10/10 [00:02<00:00,  3.94it/s]

docs/sched.html
docs/contextvars.html





# 2. Concurrently With submit()

In [28]:
from concurrent.futures import ThreadPoolExecutor


n_threads = len(urls)
with ThreadPoolExecutor(n_threads) as executor:
    _ = [executor.submit(download_and_save, url, path) for url in tqdm(urls)]

100%|██████████| 10/10 [00:00<00:00, 885.51it/s]


docs/sched.html
docs/contextvars.html
docs/queue.html
docs/multiprocessing.shared_memory.html
docs/concurrent.futures.html
docs/concurrent.html
docs/subprocess.html
docs/threading.html
docs/concurrency.html
docs/multiprocessing.html


In [29]:
with ThreadPoolExecutor(n_threads) as executor:
    # download each url and save as a local file
    futures = [executor.submit(download_url, url) for url in urls]
    # process each result as it is available
    for future in futures:
        # get the downloaded url data
        data, url = future.result()
        # check for no data
        if data is None:
            print(f'>Error downloading {url}')
            continue
        # save the data to a local file
        outpath = save_file(url, data, path)
        # report progress
        print(f'>Saved {url} to {outpath}')

docs/concurrency.html
>Saved https://docs.python.org/3/library/concurrency.html to None
docs/concurrent.html
>Saved https://docs.python.org/3/library/concurrent.html to None
docs/concurrent.futures.html
>Saved https://docs.python.org/3/library/concurrent.futures.html to None
docs/threading.html
>Saved https://docs.python.org/3/library/threading.html to None
docs/multiprocessing.html
>Saved https://docs.python.org/3/library/multiprocessing.html to None
docs/multiprocessing.shared_memory.html
>Saved https://docs.python.org/3/library/multiprocessing.shared_memory.html to None
docs/subprocess.html
>Saved https://docs.python.org/3/library/subprocess.html to None
docs/queue.html
>Saved https://docs.python.org/3/library/queue.html to None
docs/sched.html
>Saved https://docs.python.org/3/library/sched.html to None
docs/contextvars.html
>Saved https://docs.python.org/3/library/contextvars.html to None
