# Synchronous version

In [1]:
import requests
import time
from tqdm import tqdm
 
def download_all_sites(sites):
    def download_site(url, session):
        with session.get(url) as response:
            length = len(response.content)
    #         print(f"Read {len(response.content)} from {url}")
    with requests.Session() as session:
        for url in tqdm(sites):
            download_site(url, session)

In [1]:
sites = [
    "https://www.jython.org",
    "http://olympus.realpython.org/dice",
] * 80

In [2]:
start_time = time.time()
download_all_sites(sites)
duration = time.time() - start_time
print(f"Downloaded {len(sites)} in {duration} seconds")

100%|██████████| 160/160 [00:20<00:00,  7.86it/s]

Downloaded 160 in 20.370038986206055 seconds





# Threading version

In [3]:
import concurrent
import threading

thread_local = threading.local()

def download_all_sites_threading(sites):
    def get_session():
        if not hasattr(thread_local, "session"):
            thread_local.session = requests.Session()
        return thread_local.session
    def download_site(url):
        session = get_session()
        with session.get(url) as response:
            length = len(response.content)
#             print(f"Read {len(response.content)} from {url}")
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_site, sites)

In [4]:
start_time = time.time()
download_all_sites_threading(sites)
duration = time.time() - start_time
print(f"Downloaded {len(sites)} in {duration} seconds")

Downloaded 160 in 4.3464741706848145 seconds


# Asyncio version

In [2]:
import asyncio
import aiohttp
import time
from tqdm import tqdm

In [3]:
async def download_site(session, url):
    async with session.get(url) as response:
        length = len(response.content)
#             print(f"Read {len(response.content)} from {url}")

async def download_all_sites_asyncio(sites):    
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in tqdm(sites):
            task = asyncio.ensure_future(download_site(session, url))
            tasks.append(task)
        await asyncio.gather(*tasks, return_exceptions=True)

start_time = time.time()
asyncio.get_event_loop().run_until_complete(download_all_sites_asyncio(sites))
duration = time.time() - start_time
print(f"Downloaded {len(sites)} sites in {duration} seconds")

RuntimeError: This event loop is already running

  0%|          | 0/160 [00:00<?, ?it/s]


# uvloop version