In [17]:
# https://realpython.com/python-concurrency/
# https://realpython.com/intro-to-python-threading/

# Requests Library Documentation: https://2.python-requests.org/en/master/
# Session Object Documentation: https://2.python-requests.org/en/master/user/advanced/#id1

# Resources:
# https://github.com/realpython/materials/tree/master/concurrency-overview
# https://github.com/realpython/materials/tree/master/intro-to-threading

import requests
import time


def download_site(url, session):
    with session.get(url) as response:
        print(f"Read {len(response.content)} from {url}")


def download_all_sites(sites):
    with requests.Session() as session:
        for url in sites:
            download_site(url, session)


if __name__ == "__main__":
    sites = [
        "https://www.jython.org",
        "http://olympus.realpython.org/dice",
    ] * 10
    start_time = time.time()
    download_all_sites(sites)
    duration = time.time() - start_time
    print(f"Downloaded {len(sites)} in {duration} seconds")

Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Downloaded 20 in 2.351557970046997 seconds


In [7]:
    sites = [
        "https://www.jython.org",
        "http://olympus.realpython.org/dice",
    ] * 4
    
    print(sites)

['https://www.jython.org', 'http://olympus.realpython.org/dice', 'https://www.jython.org', 'http://olympus.realpython.org/dice', 'https://www.jython.org', 'http://olympus.realpython.org/dice', 'https://www.jython.org', 'http://olympus.realpython.org/dice']


In [16]:
# Threading version

import concurrent.futures
import requests
import threading
import time


thread_local = threading.local()


def get_session():
    if not hasattr(thread_local, "session"):
        thread_local.session = requests.Session()
    return thread_local.session


def download_site(url):
    session = get_session()
    with session.get(url) as response:
        print(f"Read {len(response.content)} from {url}")


def download_all_sites(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        executor.map(download_site, sites)


if __name__ == "__main__":
    sites = [
        "https://www.jython.org",
        "http://olympus.realpython.org/dice",
    ] * 10
    start_time = time.time()
    download_all_sites(sites)
    duration = time.time() - start_time
    print(f"Downloaded {len(sites)} in {duration} seconds")

Read 10267 from https://www.jython.org
Read 10267 from https://www.jython.org
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/diceRead 273 from http://olympus.realpython.org/diceRead 273 from http://olympus.realpython.org/dice

Read 273 from http://olympus.realpython.org/dice

Read 10267 from https://www.jython.org
Read 10267 from https://www.jython.org
Read 10267 from https://www.jython.org
Read 10267 from https://www.jython.orgRead 10267 from https://www.jython.org

Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/diceRead 273 from http://olympus.realpython.org/dice

Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 10267 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Downloaded 20 in 0.7502586841583252 seconds


In [18]:
# Race Condition
# https://realpython.com/python-concurrency/

# Race conditions are an entire class of subtle bugs that can and frequently do happen in multi-threaded code. 
# Race conditions happen because the programmer has not sufficiently protected data accesses to 
# prevent threads from interfering with each other. 
# You need to take extra steps when writing threaded code to ensure things are thread-safe.
# What’s going on here is that the operating system is controlling when your thread runs and when it gets 
# swapped out to let another thread run. This thread swapping can occur at any point, 
# even while doing sub-steps of a Python statement. 
# As a quick example, look at this function:

import concurrent.futures


counter = 0


def increment_counter(fake_value):
    global counter
    for _ in range(100):
        counter += 1


if __name__ == "__main__":
    fake_data = [x for x in range(5000)]
    counter = 0
    with concurrent.futures.ThreadPoolExecutor(max_workers=5000) as executor:
        executor.map(increment_counter, fake_data)
        
# This code is quite similar to the structure you used in the threading example above. 
# The difference is that each of the threads is accessing the same global variable counter and incrementing it. 
# Counter is not protected in any way, so it is not thread-safe.

# In order to increment counter, each of the threads needs to read the current value, add one to it, 
# and the save that value back to the variable. That happens in this line: counter += 1.

# Because the operating system knows nothing about your code and can swap threads at any point in the execution, 
# it’s possible for this swap to happen after a thread has read the value but before it has had the chance to write it back. 
# If the new code that is running modifies counter as well, then the first thread has a stale copy of the data and trouble 
# will ensue.

# As you can imagine, hitting this exact situation is fairly rare. 
# You can run this program thousands of times and never see the problem. 
# That’s what makes this type of problem quite difficult to debug as it can be quite hard to reproduce and 
# can cause random-looking errors to show up.

# As a further example, I want to remind you that requests.Session() is not thread-safe. 
# This means that there are places where the type of interaction described above could happen if multiple threads 
# use the same Session. I bring this up not to cast aspersions on requests but rather to point 
# out that these are difficult problems to resolve.

In [20]:
fake_data = [x for x in range(20)]

print(fake_data)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]


In [7]:
# asyncio
# https://realpython.com/python-concurrency/
# https://stackoverflow.com/questions/49005651/how-does-asyncio-actually-work/51116910#51116910

import asyncio
import time
import aiohttp

# Fix to use an event_loop inside another event_loop as jupyter already uses an event_loop
import nest_asyncio
nest_asyncio.apply()


async def download_site(session, url):
    async with session.get(url) as response:
        print("Read {0} from {1}".format(response.content_length, url))


async def download_all_sites(sites):
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in sites:
            task = asyncio.ensure_future(download_site(session, url))
            tasks.append(task)
        await asyncio.gather(*tasks, return_exceptions=True)


if __name__ == "__main__":
    sites = [
        "https://www.jython.org",
        "http://olympus.realpython.org/dice",
    ] * 10
    start_time = time.time()
    asyncio.run(download_all_sites(sites))
    # asyncio.get_event_loop().run_until_complete(download_all_sites(sites))
    duration = time.time() - start_time
    print(f"Downloaded {len(sites)} sites in {duration} seconds")

Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 3549 from https://www.jython.org
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Read 273 from http://olympus.realpython.org/dice
Downloaded 20 sites in 0.8111109733581543 seconds


In [4]:
if __name__ == "__main__":
    print("Hola")

Hola
