## THREADING

In [2]:
import threading
import time

In [3]:
start = time.perf_counter()

def do_something():
    print('Sleeping 1 second...')
    time.sleep(1)
    print('Done sleeping...')
    
do_something()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Done sleeping...
Finished in 1.0 second(s)


In [4]:
start = time.perf_counter()

do_something()
do_something()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Finished in 2.0 second(s)


In [6]:
start = time.perf_counter()

t1 = threading.Thread(target=do_something)
t2 = threading.Thread(target=do_something)

t1.start()
t2.start()

t1.join()
t2.join()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Sleeping 1 second...
Done sleeping...
Done sleeping...
Finished in 1.0 second(s)


In [7]:
start = time.perf_counter()

threads = []
for _ in range(10):
    t = threading.Thread(target=do_something)
    t.start()
    threads.append(t)
    # can't join here -- it would cause the loop to pause
    
for t in threads:
    t.join()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Sleeping 1 second...
Sleeping 1 second...
Sleeping 1 second...
Sleeping 1 second...
Sleeping 1 second...Sleeping 1 second...

Sleeping 1 second...
Sleeping 1 second...
Sleeping 1 second...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Finished in 1.02 second(s)


In [8]:
def do_something(seconds):
    print(f'Sleeping {seconds} second(s)...')
    time.sleep(seconds)
    print('Done sleeping...')
    
start = time.perf_counter()

threads = []
for _ in range(10):
    t = threading.Thread(target=do_something, args=[1.5])
    t.start()
    threads.append(t)
    # can't join here -- it would cause the loop to pause
    
for t in threads:
    t.join()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Finished in 1.52 second(s)


The above is the older (manual) way to do python threads.

In python 3.2, they added a ThreadPool Executor. Let's try using that:

In [9]:
import concurrent.futures

def do_something(seconds):
    print(f'Sleeping {seconds} second(s)...')
    time.sleep(seconds)
    return 'Done sleeping...'   # changed this to return a value
    
start = time.perf_counter()

with concurrent.futures.ThreadPoolExecutor() as executor:
    f1 = executor.submit(do_something, 1)  # returns a future object
    print(f1.result())

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second(s)...
Done sleeping...
Finished in 1.0 second(s)


In [10]:
start = time.perf_counter()

with concurrent.futures.ThreadPoolExecutor() as executor:
    f1 = executor.submit(do_something, 1)  # returns a future object
    f2 = executor.submit(do_something, 1)  # returns a future object
    print(f1.result())
    print(f2.result())

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second(s)...
Sleeping 1 second(s)...
Done sleeping...
Done sleeping...
Finished in 1.0 second(s)


In [11]:
start = time.perf_counter()

with concurrent.futures.ThreadPoolExecutor() as executor:
    results = [executor.submit(do_something, 1) for _ in range(10)]
    # get results as the threads are completed
    for f in concurrent.futures.as_completed(results):
        print(f.result())

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...Sleeping 1 second(s)...

Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Done sleeping...
Finished in 1.01 second(s)


In [14]:
def do_something(seconds):
    print(f'Sleeping {seconds} second(s)...')
    time.sleep(seconds)
    return f'Done sleeping...{seconds}'
    
start = time.perf_counter()

with concurrent.futures.ThreadPoolExecutor() as executor:
    secs = [5,4,3,2,1]
    results = [executor.submit(do_something, sec) for sec in secs]
    # get results as the threads are completed
    for f in concurrent.futures.as_completed(results):
        print(f.result())

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 5 second(s)...
Sleeping 4 second(s)...
Sleeping 3 second(s)...Sleeping 2 second(s)...

Sleeping 1 second(s)...
Done sleeping...1
Done sleeping...2
Done sleeping...3
Done sleeping...4
Done sleeping...5
Finished in 5.0 second(s)


In [15]:
start = time.perf_counter()

with concurrent.futures.ThreadPoolExecutor() as executor:
    secs = [5,4,3,2,1]
    results = executor.map(do_something, secs)
    
    # map will return results in the order the results were STARTED
    for result in results:
        print(result)

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 5 second(s)...
Sleeping 4 second(s)...
Sleeping 3 second(s)...
Sleeping 2 second(s)...
Sleeping 1 second(s)...
Done sleeping...5
Done sleeping...4
Done sleeping...3
Done sleeping...2
Done sleeping...1
Finished in 5.01 second(s)


NOTE: If our function raises an exception, it will not raise the exception while running the thread. The exception is raised when its value is retrieved from the results iterator. If you need to handle exceptions, handle them within the iterator.

Also, this form does not require 'join' to wait for threads to execute.

## Real World Example - Downloading Images

In [44]:
import requests

# Now try downloading images with threading

img_urls = [
    'https://images.unsplash.com/photo-1516117172878-fd2c41f4a759',
    'https://images.unsplash.com/photo-1532009324734-20a7a5813719',
    'https://images.unsplash.com/photo-1524429656589-6633a470097c',
    'https://images.unsplash.com/photo-1530224264768-7ff8c1789d79',
    'https://images.unsplash.com/photo-1564135624576-c5c88640f235',
    'https://images.unsplash.com/photo-1541698444083-023c97d3f4b6',
    'https://images.unsplash.com/photo-1522364723953-452d3431c267',
    'https://images.unsplash.com/photo-1513938709626-033611b8cc03',
    'https://images.unsplash.com/photo-1507143550189-fed454f93097',
    'https://images.unsplash.com/photo-1493976040374-85c8e12f0c0e',
    'https://images.unsplash.com/photo-1504198453319-5ce911bafcde',
    'https://images.unsplash.com/photo-1530122037265-a5f1f91d3b99',
    'https://images.unsplash.com/photo-1516972810927-80185027ca84',
    'https://images.unsplash.com/photo-1550439062-609e1531270e',
    'https://images.unsplash.com/photo-1549692520-acc6669e2f0c'
]

t1 = time.perf_counter()

# synchronous downloads
for img_url in img_urls:
    img_bytes = requests.get(img_url).content
    img_name = img_url.split('/')[3]
    img_name = f'images/{img_name}.jpg'
    with open(img_name, 'wb') as img_file:
        img_file.write(img_bytes)
        print(f'{img_name} was downloaded...')
        
t2 = time.perf_counter()

print(f'Finished in {t2-t1} seconds')

photo-1516117172878-fd2c41f4a759.jpg was downloaded...
photo-1532009324734-20a7a5813719.jpg was downloaded...
photo-1524429656589-6633a470097c.jpg was downloaded...
photo-1530224264768-7ff8c1789d79.jpg was downloaded...
photo-1564135624576-c5c88640f235.jpg was downloaded...
photo-1541698444083-023c97d3f4b6.jpg was downloaded...
photo-1522364723953-452d3431c267.jpg was downloaded...
photo-1513938709626-033611b8cc03.jpg was downloaded...
photo-1507143550189-fed454f93097.jpg was downloaded...
photo-1493976040374-85c8e12f0c0e.jpg was downloaded...
photo-1504198453319-5ce911bafcde.jpg was downloaded...
photo-1530122037265-a5f1f91d3b99.jpg was downloaded...
photo-1516972810927-80185027ca84.jpg was downloaded...
photo-1550439062-609e1531270e.jpg was downloaded...
photo-1549692520-acc6669e2f0c.jpg was downloaded...
Finished in 15.792922200002067 seconds


In [47]:
t1 = time.perf_counter()

def download_image(img_url):
    img_bytes = requests.get(img_url).content
    img_name = img_url.split('/')[3]
    img_name = f'images/{img_name}.jpg'
    with open(img_name, 'wb') as img_file:
        img_file.write(img_bytes)
        print(f'{img_name} was downloaded...')
     
# asynchronous downloads
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(download_image, img_urls)
    
t2 = time.perf_counter()

print(f'Finished in {t2-t1} seconds')

images/photo-1516117172878-fd2c41f4a759.jpg was downloaded...
images/photo-1530224264768-7ff8c1789d79.jpg was downloaded...
images/photo-1507143550189-fed454f93097.jpg was downloaded...
images/photo-1524429656589-6633a470097c.jpg was downloaded...
images/photo-1549692520-acc6669e2f0c.jpg was downloaded...
images/photo-1522364723953-452d3431c267.jpg was downloaded...
images/photo-1504198453319-5ce911bafcde.jpg was downloaded...
images/photo-1541698444083-023c97d3f4b6.jpg was downloaded...
images/photo-1532009324734-20a7a5813719.jpg was downloaded...
images/photo-1516972810927-80185027ca84.jpg was downloaded...
images/photo-1564135624576-c5c88640f235.jpg was downloaded...
images/photo-1493976040374-85c8e12f0c0e.jpg was downloaded...
images/photo-1530122037265-a5f1f91d3b99.jpg was downloaded...
images/photo-1513938709626-033611b8cc03.jpg was downloaded...
images/photo-1550439062-609e1531270e.jpg was downloaded...
Finished in 5.453322400004254 seconds


NOTE: Threading works well for this example of downloading images because it is "IO-bound". If your code is "CPU-bound", then threading could actually make performance worse because there is some overhead to using threads.

## MULTIPROCESSING

In [21]:
start = time.perf_counter()

def do_something():
    print('Sleeping 1 second...')
    time.sleep(1)
    print('Done sleeping...')
    
do_something()
do_something()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Finished in 2.0 second(s)


In [22]:
import multiprocessing

start = time.perf_counter()

p1 = multiprocessing.Process(target=do_something)
p2 = multiprocessing.Process(target=do_something)

p1.start()
p2.start()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Finished in 1.23 second(s)
Sleeping 1 second...
Done sleeping...
Done sleeping...


In [24]:
start = time.perf_counter()

p1 = multiprocessing.Process(target=do_something)
p2 = multiprocessing.Process(target=do_something)

p1.start()
p2.start()

p1.join()
p2.join()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Sleeping 1 second...
Done sleeping...
Done sleeping...
Finished in 2.37 second(s)


In [25]:
start = time.perf_counter()

processes = []
for _ in range(10):
    p = multiprocessing.Process(target=do_something)
    p.start()
    processes.append(p)

for p in processes:
    p.join()
    
finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second...
Sleeping 1 second...
Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Sleeping 1 second...
Done sleeping...
Done sleeping...
Done sleeping...
Finished in 7.76 second(s)


In [27]:
start = time.perf_counter()

def do_something(seconds):
    print(f'Sleeping {seconds} second(s)...')
    time.sleep(seconds)
    print('Done sleeping...')

processes = []
for _ in range(10):
    # NOTE: unlike with threads, arguments to process must be
    # serializable with pickle
    p = multiprocessing.Process(target=do_something, args=[1.5])
    p.start()
    processes.append(p)
    
for p in processes:
    p.join()

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Done sleeping...
Done sleeping...
Sleeping 1.5 second(s)...
Done sleeping...
Done sleeping...
Sleeping 1.5 second(s)...
Sleeping 1.5 second(s)...
Done sleeping...
Sleeping 1.5 second(s)...
Done sleeping...
Sleeping 1.5 second(s)...
Done sleeping...
Sleeping 1.5 second(s)...
Done sleeping...
Done sleeping...
Done sleeping...
Finished in 9.32 second(s)


In python 3.2, they added a ProcessPool Executor. The following code will use this "newer" method of multiprocessing.

In [30]:
import concurrent.futures

start = time.perf_counter()

def do_something(seconds):
    print(f'Sleeping {seconds} second(s)...')
    time.sleep(seconds)
    return f'Done sleeping...{seconds}'

# best to use process pool with a context manager
with concurrent.futures.ProcessPoolExecutor() as executor:
    # submit method returns a future object
    f1 = executor.submit(do_something, 1)
    print(f1.result())

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')


Sleeping 1 second(s)...
Done sleeping...1
Finished in 9.82 second(s)


In [31]:
start = time.perf_counter()

# best to use process pool with a context manager
with concurrent.futures.ProcessPoolExecutor() as executor:
    # submit method returns a future object
    f1 = executor.submit(do_something, 1)
    f2 = executor.submit(do_something, 1)
    print(f1.result())
    print(f2.result())
    
finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second(s)...
Sleeping 1 second(s)...
Done sleeping...1
Done sleeping...1
Finished in 6.25 second(s)


In [33]:
start = time.perf_counter()

# best to use process pool with a context manager
with concurrent.futures.ProcessPoolExecutor() as executor:
    results = [executor.submit(do_something, 1) for _ in range(10)]
    for f in concurrent.futures.as_completed(results):
        print(f.result())
        
finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Sleeping 1 second(s)...
Done sleeping...1
Done sleeping...1
Done sleeping...1
Done sleeping...1
Done sleeping...1
Done sleeping...1
Done sleeping...1
Done sleeping...1
Done sleeping...1
Done sleeping...1
Finished in 7.23 second(s)


In [34]:
start = time.perf_counter()

# best to use process pool with a context manager
with concurrent.futures.ProcessPoolExecutor() as executor:
    secs = [5, 4, 3, 2, 1]
    results = [executor.submit(do_something, sec) for sec in secs]
    for f in concurrent.futures.as_completed(results):
        print(f.result())
        
finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 4 second(s)...
Sleeping 5 second(s)...
Sleeping 3 second(s)...
Sleeping 2 second(s)...
Sleeping 1 second(s)...
Done sleeping...1
Done sleeping...2
Done sleeping...3
Done sleeping...4
Done sleeping...5
Finished in 10.29 second(s)


In [35]:
start = time.perf_counter()

# best to use process pool with a context manager
with concurrent.futures.ProcessPoolExecutor() as executor:
    secs = [5, 4, 3, 2, 1]
    results = executor.map(do_something, secs)
    
    for result in results:
        print(result)
        
finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Sleeping 4 second(s)...
Sleeping 5 second(s)...
Sleeping 3 second(s)...
Sleeping 1 second(s)...
Sleeping 2 second(s)...
Done sleeping...5
Done sleeping...4
Done sleeping...3
Done sleeping...2
Done sleeping...1
Finished in 10.31 second(s)


## Real World Example - Image Processing

In [50]:
from PIL import Image, ImageFilter

img_names = [
    'photo-1516117172878-fd2c41f4a759.jpg',
    'photo-1532009324734-20a7a5813719.jpg',
    'photo-1524429656589-6633a470097c.jpg',
    'photo-1530224264768-7ff8c1789d79.jpg',
    'photo-1564135624576-c5c88640f235.jpg',
    'photo-1541698444083-023c97d3f4b6.jpg',
    'photo-1522364723953-452d3431c267.jpg',
    'photo-1513938709626-033611b8cc03.jpg',
    'photo-1507143550189-fed454f93097.jpg',
    'photo-1493976040374-85c8e12f0c0e.jpg',
    'photo-1504198453319-5ce911bafcde.jpg',
    'photo-1530122037265-a5f1f91d3b99.jpg',
    'photo-1516972810927-80185027ca84.jpg',
    'photo-1550439062-609e1531270e.jpg',
    'photo-1549692520-acc6669e2f0c.jpg'
]

t1 = time.perf_counter()

size = (1200, 1200)

for img_name in img_names:
    img = Image.open(f'images/{img_name}')
    img = img.filter(ImageFilter.GaussianBlur(15))
    img.thumbnail(size)
    img.save(f'processed/{img_name}')
    print(f'{img_name} was processed...')
    
t2 = time.perf_counter()
print(f'Finished in {t2-t1} seconds')

photo-1516117172878-fd2c41f4a759.jpg was processed...
photo-1532009324734-20a7a5813719.jpg was processed...
photo-1524429656589-6633a470097c.jpg was processed...
photo-1530224264768-7ff8c1789d79.jpg was processed...
photo-1564135624576-c5c88640f235.jpg was processed...
photo-1541698444083-023c97d3f4b6.jpg was processed...
photo-1522364723953-452d3431c267.jpg was processed...
photo-1513938709626-033611b8cc03.jpg was processed...
photo-1507143550189-fed454f93097.jpg was processed...
photo-1493976040374-85c8e12f0c0e.jpg was processed...
photo-1504198453319-5ce911bafcde.jpg was processed...
photo-1530122037265-a5f1f91d3b99.jpg was processed...
photo-1516972810927-80185027ca84.jpg was processed...
photo-1550439062-609e1531270e.jpg was processed...
photo-1549692520-acc6669e2f0c.jpg was processed...
Finished in 17.133864499999618 seconds


In [52]:
t1 = time.perf_counter()

size = (1200, 1200)

def process_image(img_name):
    img = Image.open(f'images/{img_name}')
    img = img.filter(ImageFilter.GaussianBlur(15))
    img.thumbnail(size)
    img.save(f'processed/{img_name}')
    print(f'{img_name} was processed...')
    
with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(process_image, img_names)
    
t2 = time.perf_counter()
print(f'Finished in {t2-t1} seconds')

photo-1516117172878-fd2c41f4a759.jpg was processed...
photo-1507143550189-fed454f93097.jpg was processed...
photo-1524429656589-6633a470097c.jpg was processed...
photo-1522364723953-452d3431c267.jpg was processed...
photo-1530224264768-7ff8c1789d79.jpg was processed...
photo-1532009324734-20a7a5813719.jpg was processed...
photo-1564135624576-c5c88640f235.jpg was processed...
photo-1513938709626-033611b8cc03.jpg was processed...
photo-1541698444083-023c97d3f4b6.jpg was processed...
photo-1516972810927-80185027ca84.jpg was processed...
photo-1530122037265-a5f1f91d3b99.jpg was processed...
photo-1550439062-609e1531270e.jpg was processed...
photo-1549692520-acc6669e2f0c.jpg was processed...
photo-1504198453319-5ce911bafcde.jpg was processed...
photo-1493976040374-85c8e12f0c0e.jpg was processed...
Finished in 17.204452799996943 seconds


Summary: Use **threads** for IO-bound processes, and use **processes** for CPU-bound processes.

## SUBPROCESSES