# Rules of thumb

Max Processes == 10s

Max Threads == 100s

Async methods == 1000s

**You can't create infinite numbers of threads and processes**



## Preferred technique -- use concurrent.futures for "pooling"


[See the official docs](https://docs.python.org/3/library/concurrent.futures.html)

### Executor

An object which executes code for you, asynchronously

### Future

The status of an execution, it's return value, it's exception if there is one, etc.  Similar to a promise



In [None]:
import concurrent.futures

def my_function(n):
    return f"This string has {n} in it"

# The with statement will ensure the executor is cleaned up when we are done
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
    my_future = executor.submit(my_function, 123)
    print(my_future)
    print(my_future.result())

In [None]:
# Use processes instead of threads

# The with statement will ensure the executor is cleaned up when we are done
with concurrent.futures.ProcessPoolExecutor(max_workers=2) as executor:
    my_future = executor.submit(my_function, 123)
    print(my_future)
    print(my_future.result())

In [None]:
# Handling exceptions
def my_function(n):
    raise ValueError()
    
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
    my_future = executor.submit(my_function, 123)
    print(my_future)
    # print(my_future.result())


## Executor execution methods

```python
my_executor = ...

my_future = my_executor.submit( my_function, arg, arg, arg, ...)

my_generator = my_executor.map( my_function, iterable_args )
```


In [None]:
import random
import time

def my_function(n):
    time.sleep(random.random()*n)    
    if n >= 10:
        raise ValueError()
    return f"This string has {n} in it"

with concurrent.futures.ThreadPoolExecutor(max_workers=2) as my_executor:
    my_data = [ 1, 5, 10 ]
    result = my_executor.map(my_function, my_data)
    
    # NOTE: my_future isn't a future!  It's a generator
    print(result)
  

    

In [None]:
# The with statement will ensure the executor is cleaned up when we are done
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as my_executor:
    my_data = [ 1, 5, 10 ]
    results = my_executor.map(my_function, my_data)
    
    for x in results:
        print(x)

# A useful pattern: map()

1.  Use the global map() method
2.  Switch from map() to executor.map()


In [None]:
#from http://rasmusrasmussen.com/rtweets/
some_tweets = [
    'My big mug is a loaded gun, and I want to wake up. I care about ingenious alcohol, bro. #midnightrant #randomtweet',
    'My cat is getting old, and I want to get a yo-yo. I care about hot drinks, apparently. #fishbite #randomtweet',
    'My groove is over the top, and I want to level up. Historically stoic robots, until tomorrow. #someeats #randomtweet'
]

def remove_vowels(s):
    vowels = ('a', 'e', 'i', 'o', 'u')
    return ''.join([l for l in s if l not in vowels])


for result in map(remove_vowels,some_tweets):
    print(result)

In [None]:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as my_executor:
    for result in my_executor.map(remove_vowels,some_tweets):
        print(result)

## Lower level, and older api: multiprocessing

### Pools of processes

In [None]:
%%time
import random
import time
from multiprocessing import Pool

def do_something(n):
    'I compute and block'
    time.sleep(2)
    return n+1
    
    
pool = Pool(2)
with pool:
    result = pool.apply(do_something, (1,))
    print(result)
    result = pool.apply(do_something, (2,))
    print(result)

In [None]:
%%time
pool = Pool(2)
with pool:
    # result1 is an multiprocessing.pool.AsyncResult object
    result1 = pool.apply_async(do_something, (1,))
    print(result1.ready())
    
    result2 = pool.apply_async(do_something, (2,))
    print(result2.ready())
    
    print(result1.get(timeout=10))
    print(result2.get(timeout=10))

### Pools of threads (same api, more or less)

In [None]:
%%time
from multiprocessing.pool import ThreadPool as Pool

def do_something(n):
    'I compute and block'
    time.sleep(2)
    return n+1
    
    
pool = Pool(2)
with pool:
    result = pool.apply(do_something, (1,))
    print(result)
    result = pool.apply(do_something, (2,))
    print(result)

# Your turn

Use a thread pool to fetch the contents of an array of websites.  Use any technique you'd like from what you have learned so far

In [None]:
import urllib

def read_a_website(url_string):
    'Use this function to get your website contents'
    with urllib.request.urlopen(url_string) as response:
        return response.read()
    
def print_website_html(html):
    print(html[:100])
    
websites_to_fetch = [
    'https://google.com',
    'https://proofpoint.com'
    # add some more
]

# Fetch the websites using a thread executor, then a process executor
# print the html from each website

    

In [None]:
import urllib

def read_a_website(url_string):
    'Use this function to get your website contents'
    with urllib.request.urlopen(url_string) as response:
        return response.read()
    
def print_website_html(html):
    print(html[:100])
    
websites_to_fetch = [
    'https://google.com',
    'https://proofpoint.com'
    # add some more
]

# Fetch the websites using a thread pool
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as my_executor:
    results = my_executor.map(read_a_website,websites_to_fetch)
    for html in results:
        print_website_html(html)
    