In [1]:
# look at threding notebook before coming here, it has more explanations
# we want multiprocessing when we wanna significantly speed up the program
# this seep up happens in parallel

In [2]:
# simple script
import time

start = time.perf_counter()

def do_something(sleep_time):
    print('Sleeping for '+str(sleep_time)+'seconds...')
    time.sleep(sleep_time)
    print('Done sleeping for '+str(sleep_time)+'seconds')
    
do_something(1)
do_something(1)

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

Sleeping for 1seconds...
Done sleeping for 1seconds
Sleeping for 1seconds...
Done sleeping for 1seconds
Finished in 2.0 seconds


In [3]:
# Flow - function-call -> wait till execution completion -> function-call -> wait till execution completion -> Done
# The above flow is synchronous one
# Multi-processing is good with both I/O bound task and CPU bound task
# Run things in parallel
# Flow -   function-call -> wait till execution completion
#          function-call -> wait till execution completion
#                                                         |
#                                                         v
#                                                        Done

In [4]:
import multiprocessing

In [5]:
start = time.perf_counter()

def do_something(sleep_time):
    print('Sleeping for '+str(sleep_time)+'seconds...')
    time.sleep(sleep_time)
    print('Done sleeping for '+str(sleep_time)+'seconds')

# initialize process objects
process1 = multiprocessing.Process(target=do_something, args=[1]) # we need to set arguments as a list and provide them to args attribute
process2 = multiprocessing.Process(target=do_something, args=[1])

# run the function
process1.start() # this line is asynchronous
process2.start() # this line is asynchronous
# the above lines kick off our processess and moves on to next lines of code before our process gets finished

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

Sleeping for 1seconds...Finished in 0.01 seconds

Sleeping for 1seconds...
Done sleeping for 1secondsDone sleeping for 1seconds



In [6]:
# what if we wanted to move to next lines until all the processess gets finished - we use join
start = time.perf_counter()

def do_something(sleep_time):
    print('Sleeping for '+str(sleep_time)+'seconds...')
    time.sleep(sleep_time)
    print('Done sleeping for '+str(sleep_time)+'seconds')

# initialize process objects
process1 = multiprocessing.Process(target=do_something, args=[1])
process2 = multiprocessing.Process(target=do_something, args=[1])

# run the function
process1.start() # this line is asynchronous
process2.start() # this line is asynchronous
# now both process started 

# we need to specify that we can only move on to next lines only when the processes are finished executing
process1.join()
process2.join()

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

Sleeping for 1seconds...Sleeping for 1seconds...

Done sleeping for 1secondsDone sleeping for 1seconds

Finished in 1.03 seconds


In [7]:
# run many processess parallelly
start = time.perf_counter()

def do_something(sleep_time):
    print('Sleeping for '+str(sleep_time)+'seconds...')
    time.sleep(sleep_time)
    print('Done sleeping for '+str(sleep_time)+'seconds')
    
processes = []
for _ in range(10):
    process = multiprocessing.Process(target=do_something, args=[1])
    process.start()
    processes.append(process)
for i in range(10):
    processes[i].join()
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

Sleeping for 1seconds...
Sleeping for 1seconds...
Sleeping for 1seconds...Sleeping for 1seconds...
Sleeping for 1seconds...Sleeping for 1seconds...Sleeping for 1seconds...
Sleeping for 1seconds...



Sleeping for 1seconds...Sleeping for 1seconds...

Done sleeping for 1seconds
Done sleeping for 1seconds
Done sleeping for 1seconds
Done sleeping for 1secondsDone sleeping for 1secondsDone sleeping for 1seconds
Done sleeping for 1seconds
Done sleeping for 1seconds


Done sleeping for 1secondsDone sleeping for 1seconds

Finished in 1.12 seconds


In [8]:
# New and effecient way of using multi processes
import concurrent.futures

In [9]:
# simple single process script
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping...'
    
# we will use context manager to be safe
with concurrent.futures.ProcessPoolExecutor() as executor:
    # if we wanna execute the function one at a time we ought to use submit method
    p1 = executor.submit(do_something, 1) # 2 args, function name, arguments. It returns a futures object
    # the future object basically encapsulates the execution of our function, it allows us to check in on in after it's been scheduled
    # we can check whether it' running or done and also check the result(return value)
    print(p1.result()) # it will actually wait around until our function completes
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
done sleeping...
Finished in 1.05 seconds


In [15]:
# run the script 10 times - part 1
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping...'

with concurrent.futures.ProcessPoolExecutor() as executor:
    futures = []
    for _ in range(10):
        p = executor.submit(do_something, 1)
        futures.append(p)
    for p in concurrent.futures.as_completed(futures):
        print(p.result())
        
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')
# eventhough this code took longertime than using multiprocessing library, we can trust it

sleeping forsleeping forsleeping for sleeping for1    1second...1 1 
second... 
second...
second...
sleeping for sleeping for1sleeping for  1 1second...  
second...second...sleeping for
 
1 second...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
sleeping for 1 second...
sleeping for 1 second...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
Finished in 3.16 seconds


In [19]:
# run the script 10 times - part 2

import random
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for '+str(sleep_time)+'second...')
    time.sleep(sleep_time)
    return 'done sleeping '+str(sleep_time)+'seconds'

with concurrent.futures.ProcessPoolExecutor() as executor:
    # we can assume that only inside this with block asynchronous shit takes place, otherwise no other place
    # when 'with' shit is fully completed even though stuff inside are asynchronous, then only we move on to next slide
    futures = []
    for _ in range(5):
        p = executor.submit(do_something, random.randint(1,5))
        futures.append(p)
    for p in concurrent.futures.as_completed(futures):
        print(p.result())
        
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1second...sleeping for 3second...sleeping for 4second...
sleeping for 5second...


sleeping for 4second...
done sleeping 1seconds
done sleeping 3seconds
done sleeping 4seconds
done sleeping 5seconds
done sleeping 4seconds
Finished in 5.05 seconds


In [1]:
# Mini project

In [2]:
# data stuff
import time
import concurrent.futures
from PIL import Image, ImageFilter

img_names = [
    'photo-1516117172878-fd2c41f4a759.jpg',
    'photo-1532009324734-20a7a5813719.jpg',
    'photo-1524429656589-6633a470097c.jpg',
    'photo-1530224264768-7ff8c1789d79.jpg',
    'photo-1564135624576-c5c88640f235.jpg',
    'photo-1541698444083-023c97d3f4b6.jpg',
    'photo-1522364723953-452d3431c267.jpg',
    'photo-1513938709626-033611b8cc03.jpg',
    'photo-1507143550189-fed454f93097.jpg',
    'photo-1493976040374-85c8e12f0c0e.jpg',
    'photo-1504198453319-5ce911bafcde.jpg',
    'photo-1530122037265-a5f1f91d3b99.jpg',
    'photo-1516972810927-80185027ca84.jpg',
    'photo-1550439062-609e1531270e.jpg',
    'photo-1549692520-acc6669e2f0c.jpg'
]

In [4]:
# usual synchronous coding
start = time.perf_counter()

size = (1200, 1200)

for img_name in img_names:
    img = Image.open(img_name)

    img = img.filter(ImageFilter.GaussianBlur(15))
    img = img.filter(ImageFilter.GaussianBlur(10))
    img = img.filter(ImageFilter.GaussianBlur(5))
    img = img.filter(ImageFilter.GaussianBlur(3))
    img = img.filter(ImageFilter.GaussianBlur(3))

    img.thumbnail(size)
    img.save(f'processed/{img_name}')
    print(f'{img_name} was processed...')
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

photo-1516117172878-fd2c41f4a759.jpg was processed...
photo-1532009324734-20a7a5813719.jpg was processed...
photo-1524429656589-6633a470097c.jpg was processed...
photo-1530224264768-7ff8c1789d79.jpg was processed...
photo-1564135624576-c5c88640f235.jpg was processed...
photo-1541698444083-023c97d3f4b6.jpg was processed...
photo-1522364723953-452d3431c267.jpg was processed...
photo-1513938709626-033611b8cc03.jpg was processed...
photo-1507143550189-fed454f93097.jpg was processed...
photo-1493976040374-85c8e12f0c0e.jpg was processed...
photo-1504198453319-5ce911bafcde.jpg was processed...
photo-1530122037265-a5f1f91d3b99.jpg was processed...
photo-1516972810927-80185027ca84.jpg was processed...
photo-1550439062-609e1531270e.jpg was processed...
photo-1549692520-acc6669e2f0c.jpg was processed...
Finished in 82.92 seconds


In [5]:
# usual multiprocessing
start = time.perf_counter()

size = (1200, 1200)

def process_image(img_name):
    img = Image.open(img_name)

    img = img.filter(ImageFilter.GaussianBlur(15))
    img = img.filter(ImageFilter.GaussianBlur(10))
    img = img.filter(ImageFilter.GaussianBlur(5))
    img = img.filter(ImageFilter.GaussianBlur(3))
    img = img.filter(ImageFilter.GaussianBlur(3))

    img.thumbnail(size)
    img.save(f'processed/{img_name}')
    print(f'{img_name} was processed...')
    
with concurrent.futures.ProcessPoolExecutor() as executor:
    for img_name in img_names:
        executor.submit(process_image, img_name)
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

photo-1516117172878-fd2c41f4a759.jpg was processed...
photo-1524429656589-6633a470097c.jpg was processed...
photo-1530224264768-7ff8c1789d79.jpg was processed...
photo-1532009324734-20a7a5813719.jpg was processed...
photo-1564135624576-c5c88640f235.jpg was processed...
photo-1507143550189-fed454f93097.jpg was processed...
photo-1522364723953-452d3431c267.jpg was processed...
photo-1541698444083-023c97d3f4b6.jpg was processed...
photo-1513938709626-033611b8cc03.jpg was processed...
photo-1516972810927-80185027ca84.jpg was processed...
photo-1530122037265-a5f1f91d3b99.jpg was processed...
photo-1504198453319-5ce911bafcde.jpg was processed...
photo-1493976040374-85c8e12f0c0e.jpg was processed...
photo-1550439062-609e1531270e.jpg was processed...
photo-1549692520-acc6669e2f0c.jpg was processed...
Finished in 33.81 seconds


In [6]:
# using threading just to experiment
start = time.perf_counter()

size = (1200, 1200)

def process_image(img_name):
    img = Image.open(img_name)

    img = img.filter(ImageFilter.GaussianBlur(15))
    img = img.filter(ImageFilter.GaussianBlur(10))
    img = img.filter(ImageFilter.GaussianBlur(5))
    img = img.filter(ImageFilter.GaussianBlur(3))
    img = img.filter(ImageFilter.GaussianBlur(3))

    img.thumbnail(size)
    img.save(f'processed/{img_name}')
    print(f'{img_name} was processed...')
    
with concurrent.futures.ThreadPoolExecutor() as executor:
    for img_name in img_names:
        executor.submit(process_image, img_name)
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')
# the timing is shorter than multiprocessing but I don't know may be the computation isn't intensive enough

photo-1516117172878-fd2c41f4a759.jpg was processed...
photo-1507143550189-fed454f93097.jpg was processed...
photo-1516972810927-80185027ca84.jpg was processed...
photo-1522364723953-452d3431c267.jpg was processed...
photo-1524429656589-6633a470097c.jpg was processed...
photo-1530224264768-7ff8c1789d79.jpg was processed...
photo-1530122037265-a5f1f91d3b99.jpg was processed...
photo-1532009324734-20a7a5813719.jpg was processed...
photo-1564135624576-c5c88640f235.jpg was processed...
photo-1513938709626-033611b8cc03.jpg was processed...
photo-1549692520-acc6669e2f0c.jpg was processed...
photo-1541698444083-023c97d3f4b6.jpg was processed...
photo-1550439062-609e1531270e.jpg was processed...
photo-1504198453319-5ce911bafcde.jpg was processed...
photo-1493976040374-85c8e12f0c0e.jpg was processed...
Finished in 32.17 seconds
