In [1]:
# Why do we use threading?
# whenever we need our program to significantly speed up 
# this speed up comes from running tasks concurrently

In [2]:
# simple normal example
import time

start = time.perf_counter()

def do_something():
    print('sleeping for 1 second...')
    time.sleep(1)
    print('done sleeping...')
do_something()
do_something()

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
done sleeping...
sleeping for 1 second...
done sleeping...
Finished in 2.0 seconds


In [4]:
# program flow - function-call -> wait to complete execution -> function-call -> wait to complete execution -> Done

# running everything in order like this is called running synchronously
# synchronous running doesn't do much on the CPU when we wait around for sleep to complete
# this is called CPU or I/O bound task
# CPU bound tasks - crunching a lot of numbers
# I/O bound task - just wait for input and output operations to be completed not really using CPU that much
# I/O tasks - reading or writing file system, network operations, downloading stuff online
# we will experience the benifits of threading when our program is mostly I/O bound
# we will not experience any benifits when we use threading with CPU bound tasks moreover it will increase the runtime due to thread creation overgead

# what happens when we use threading
# function call -> wait to complete execution
#                call -> wait to complete execution -> done

In [3]:
import threading

In [8]:
start = time.perf_counter()

def do_something():
    print('sleeping for 1 second...')
    time.sleep(1)
    print('done sleeping...')

# create thread objects
t1 = threading.Thread(target=do_something)
t2 = threading.Thread(target=do_something)

# start running
t1.start()
t2.start()

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
sleeping for 1 second...Finished in 0.05 seconds

done sleeping...
done sleeping...


In [9]:
# as you can see threads started and the program doesn't wait for them to complete - it moves on to printing time, later when functions are done it outputs one by one
# this is a kind of asynchronous programming

In [10]:
# what if we wanted to calculate the finish time only when the threads are compleated
# we use join method
start = time.perf_counter()

def do_something():
    print('sleeping for 1 second...')
    time.sleep(1)
    print('done sleeping...')

# create thread objects
t1 = threading.Thread(target=do_something)
t2 = threading.Thread(target=do_something)

# start running
t1.start()
t2.start()

# finish the threads and then move on
t1.join()
t2.join()

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
sleeping for 1 second...
done sleeping...
done sleeping...
Finished in 1.01 seconds


In [11]:
# the above output says that all the threads started at same time and completed after 1 sec
# we can see that when we try to run 10 threads then all will complete execution within 1 sec

In [13]:
start = time.perf_counter()

def do_something():
    print('sleeping for 1 second...')
    time.sleep(1)
    print('done sleeping...')
    
threads = []
for _ in range(10):
    t = threading.Thread(target=do_something)
    t.start()
    threads.append(t)
    # we should not write join within this loop
    # join will stop to complete the present thread then only it will go to next iteration to create another thread
    # we need to run all the threads at once and join all the threads at once
    
for i in range(10):
    threads[i].join()

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...sleeping for 1 second...

sleeping for 1 second...
sleeping for 1 second...sleeping for 1 second...
sleeping for 1 second...

sleeping for 1 second...
sleeping for 1 second...
done sleeping...
done sleeping...
done sleeping...
done sleeping...done sleeping...
done sleeping...
done sleeping...
done sleeping...done sleeping...done sleeping...



Finished in 1.01 seconds


In [16]:
# how to pass arguments into the function that our thread aspires to execute
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    print('done sleeping...')
    
threads = []
for _ in range(10):
    t = threading.Thread(target=do_something, args=[1.5]) # we will pass arguments as a list by using args attribute
    t.start()
    threads.append(t)
    
for i in range(10):
    threads[i].join()

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1.5 second...
sleeping for 1.5 second...
sleeping for 1.5 second...
sleeping for sleeping for 1.5 second...
1.5 sleeping for 1.5 second...
second...
sleeping for 1.5 second...
sleeping for 1.5 second...
sleeping for 1.5 second...
sleeping for 1.5 second...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
Finished in 1.53 seconds


In [33]:
# how to capture return values from the function
import queue

start = time.perf_counter()

output = queue.Queue()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    print('done sleeping...')
    return 'value is returned'
    
threads = []
for _ in range(10):
    t = threading.Thread(target=lambda q, arg1: q.put(do_something(arg1)), args=[output, 1])
    t.start()
    threads.append(t)
    
for i in range(10):
    threads[i].join()

while not output.empty():
    print(output.get())

stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping forsleeping for 1 second...
 1 second...
sleeping for 1 second...
done sleeping...
done sleeping...done sleeping...
done sleeping...
done sleeping...
done sleeping...

done sleeping...done sleeping...done sleeping...


done sleeping...
value is returned
value is returned
value is returned
value is returned
value is returned
value is returned
value is returned
value is returned
value is returned
value is returned
Finished in 1.02 seconds


In [4]:
# New and effecient way of using threads
import concurrent.futures

In [31]:
# simple threading by doing once
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping...'
    
# we will use context manager to be safe
with concurrent.futures.ThreadPoolExecutor() as executor:
    # if we wanna execute the function one at a time we ought to use submit method
    f1 = executor.submit(do_something, 1.5) # 2 args, function name, arguments. It returns a futures object
    # the future object basically encapsulates the execution of our function, it allows us to check in on in after it's been scheduled
    # we can check whether it' running or done and also check the result(return value)
    print(f1.result()) # it will actually wait around until our function completes
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1.5 second...
done sleeping...
Finished in 1.5 seconds


In [32]:
# simple threading by doing twice
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping...'
    
# we will use context manager to be safe
with concurrent.futures.ThreadPoolExecutor() as executor:
    f1 = executor.submit(do_something, 1.5) #executor.submit is asynchronous
    f2 = executor.submit(do_something, 1.5) #executor.submit is asynchronous
    print(f1.result())
    print(f2.result())
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1.5 second...
sleeping for 1.5 second...
done sleeping...
done sleeping...
Finished in 1.51 seconds


In [46]:
# multiple threading - part 1
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping...'
    
# we will use context manager to be safe
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for _ in range(10):
        f = executor.submit(do_something, 1) #executor.submit is asynchronous
        futures.append(f)
    for i in range(10):
        print(futures[i].result())
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping forsleeping for 1 second...
 1 second...
sleeping forsleeping for 1 second...
 sleeping for 1 second...
1 second...
sleeping for 1 second...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
Finished in 1.02 seconds


In [50]:
# multiple threading - part 2
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping '+str(sleep_time)+'seconds'
    
# we will use context manager to be safe
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for _ in range(10):
        f = executor.submit(do_something, random.randint(1,10))
        futures.append(f)
    for i in range(10):
        print(futures[i].result()) # result will be printed only when futures[i] is fully executed
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 2 second...
sleeping for 5 second...
sleeping for sleeping for10 4 second...
sleeping for  second...1
 second...
sleeping for sleeping for 9 second...
8sleeping for 10 second...
 second...
sleeping for 8 second...
sleeping for 8 second...
done sleeping 2seconds
done sleeping 5seconds
done sleeping 10seconds
done sleeping 4seconds
done sleeping 1seconds
done sleeping 8seconds
done sleeping 9seconds
done sleeping 10seconds
done sleeping 8seconds
done sleeping 8seconds
Finished in 10.03 seconds


In [48]:
# another way to get results in multithreading - part 1
# this provides output return values for whichever thread completes first - this method is smooth
start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping...'
    
# we will use context manager to be safe
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for _ in range(10):
        f = executor.submit(do_something, 1)
        futures.append(f)
    for f in concurrent.futures.as_completed(futures):
        print(f.result())
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 1 second...
sleeping for 1 sleeping forsecond...
 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping for 1 second...
sleeping forsleeping for 1 second...
 1 second...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
done sleeping...
Finished in 1.01 seconds


In [51]:
# another way to get results in multithreading - part 2
import random

start = time.perf_counter()

def do_something(sleep_time):
    print('sleeping for', sleep_time, 'second...')
    time.sleep(sleep_time)
    return 'done sleeping '+str(sleep_time)+'seconds'
    
# we will use context manager to be safe
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for _ in range(10):
        f = executor.submit(do_something, random.randint(1,10))
        futures.append(f)
    for f in concurrent.futures.as_completed(futures): # whichever elemnt in futures list finishes first is printing it's return value
        print(f.result())
    
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')

sleeping for 3 second...
sleeping for 2 second...
sleeping for sleeping for 9 second...
2 second...
sleeping for 8 second...
sleeping for 7 sleeping for 8 second...
sleeping for 8 second...
second...sleeping for 5 second...

sleeping for 6 second...
done sleeping 2seconds
done sleeping 2seconds
done sleeping 3seconds
done sleeping 5seconds
done sleeping 6seconds
done sleeping 7seconds
done sleeping 8seconds
done sleeping 8seconds
done sleeping 8seconds
done sleeping 9seconds
Finished in 9.03 seconds


In [1]:
# Mini Project - Download high resolution images faster

In [2]:
# Required data stuff
import requests
import time
import concurrent.futures

img_urls = [
    'https://images.unsplash.com/photo-1516117172878-fd2c41f4a759',
    'https://images.unsplash.com/photo-1532009324734-20a7a5813719',
    'https://images.unsplash.com/photo-1524429656589-6633a470097c',
    'https://images.unsplash.com/photo-1530224264768-7ff8c1789d79',
    'https://images.unsplash.com/photo-1564135624576-c5c88640f235',
    'https://images.unsplash.com/photo-1541698444083-023c97d3f4b6',
    'https://images.unsplash.com/photo-1522364723953-452d3431c267',
    'https://images.unsplash.com/photo-1513938709626-033611b8cc03',
    'https://images.unsplash.com/photo-1507143550189-fed454f93097',
    'https://images.unsplash.com/photo-1493976040374-85c8e12f0c0e',
    'https://images.unsplash.com/photo-1504198453319-5ce911bafcde',
    'https://images.unsplash.com/photo-1530122037265-a5f1f91d3b99',
    'https://images.unsplash.com/photo-1516972810927-80185027ca84',
    'https://images.unsplash.com/photo-1550439062-609e1531270e',
    'https://images.unsplash.com/photo-1549692520-acc6669e2f0c'
]

In [3]:
# normal usual method

start = time.perf_counter()

for img_url in img_urls:
    img_bytes = requests.get(img_url).content
    img_name = img_url.split('/')[3]
    img_name = f'{img_name}.jpg'
    with open(img_name, 'wb') as img_file:
        img_file.write(img_bytes)
        print(f'{img_name} was downloaded...')
        
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')    

photo-1516117172878-fd2c41f4a759.jpg was downloaded...
photo-1532009324734-20a7a5813719.jpg was downloaded...
photo-1524429656589-6633a470097c.jpg was downloaded...
photo-1530224264768-7ff8c1789d79.jpg was downloaded...
photo-1564135624576-c5c88640f235.jpg was downloaded...
photo-1541698444083-023c97d3f4b6.jpg was downloaded...
photo-1522364723953-452d3431c267.jpg was downloaded...
photo-1513938709626-033611b8cc03.jpg was downloaded...
photo-1507143550189-fed454f93097.jpg was downloaded...
photo-1493976040374-85c8e12f0c0e.jpg was downloaded...
photo-1504198453319-5ce911bafcde.jpg was downloaded...
photo-1530122037265-a5f1f91d3b99.jpg was downloaded...
photo-1516972810927-80185027ca84.jpg was downloaded...
photo-1550439062-609e1531270e.jpg was downloaded...
photo-1549692520-acc6669e2f0c.jpg was downloaded...
Finished in 33.83 seconds


In [3]:
# Using threading

start = time.perf_counter()

def download_image(img_url):
    img_bytes = requests.get(img_url).content
    img_name = img_url.split('/')[3]
    img_name = f'{img_name}.jpg'
    with open(img_name, 'wb') as img_file:
        img_file.write(img_bytes)
        print(f'{img_name} was downloaded...')
        
with concurrent.futures.ThreadPoolExecutor() as executor:
    for img_url in img_urls:
        executor.submit(download_image, img_url)
        
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds') 

photo-1564135624576-c5c88640f235.jpg was downloaded...
photo-1516117172878-fd2c41f4a759.jpg was downloaded...
photo-1507143550189-fed454f93097.jpg was downloaded...
photo-1549692520-acc6669e2f0c.jpg was downloaded...
photo-1516972810927-80185027ca84.jpg was downloaded...
photo-1504198453319-5ce911bafcde.jpg was downloaded...
photo-1530224264768-7ff8c1789d79.jpg was downloaded...
photo-1530122037265-a5f1f91d3b99.jpg was downloaded...
photo-1550439062-609e1531270e.jpg was downloaded...
photo-1522364723953-452d3431c267.jpg was downloaded...
photo-1524429656589-6633a470097c.jpg was downloaded...
photo-1513938709626-033611b8cc03.jpg was downloaded...
photo-1532009324734-20a7a5813719.jpg was downloaded...
photo-1541698444083-023c97d3f4b6.jpg was downloaded...
photo-1493976040374-85c8e12f0c0e.jpg was downloaded...
Finished in 278.91 seconds


In [4]:
# Using threading - similar to above code with fewer lines of code

start = time.perf_counter()

def download_image(img_url):
    img_bytes = requests.get(img_url).content
    img_name = img_url.split('/')[3]
    img_name = f'{img_name}.jpg'
    with open(img_name, 'wb') as img_file:
        img_file.write(img_bytes)
        print(f'{img_name} was downloaded...')
        
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(download_image, img_urls)
        
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')  

photo-1516117172878-fd2c41f4a759.jpg was downloaded...
photo-1549692520-acc6669e2f0c.jpg was downloaded...
photo-1507143550189-fed454f93097.jpg was downloaded...
photo-1530122037265-a5f1f91d3b99.jpg was downloaded...
photo-1564135624576-c5c88640f235.jpg was downloaded...
photo-1550439062-609e1531270e.jpg was downloaded...
photo-1504198453319-5ce911bafcde.jpg was downloaded...
photo-1532009324734-20a7a5813719.jpg was downloaded...
photo-1522364723953-452d3431c267.jpg was downloaded...
photo-1493976040374-85c8e12f0c0e.jpg was downloaded...
photo-1530224264768-7ff8c1789d79.jpg was downloaded...
photo-1524429656589-6633a470097c.jpg was downloaded...
photo-1513938709626-033611b8cc03.jpg was downloaded...
photo-1516972810927-80185027ca84.jpg was downloaded...
photo-1541698444083-023c97d3f4b6.jpg was downloaded...
Finished in 26.47 seconds


In [8]:
# using threading library - but not so good method, use above methods
import threading

start = time.perf_counter()

def download_image(img_url):
    img_bytes = requests.get(img_url).content
    img_name = img_url.split('/')[3]
    img_name = f'{img_name}.jpg'
    with open(img_name, 'wb') as img_file:
        img_file.write(img_bytes)
        print(f'{img_name} was downloaded...')
        
threads = []
for img_url in img_urls:
    t = threading.Thread(target=download_image, args=[img_url])
    t.start()
    threads.append(t)
for i in range(len(img_urls)):
    threads[i].join()
        
stop = time.perf_counter()
print('Finished in', round(stop-start, 2), 'seconds')  

photo-1516117172878-fd2c41f4a759.jpg was downloaded...
photo-1530122037265-a5f1f91d3b99.jpg was downloaded...
photo-1564135624576-c5c88640f235.jpg was downloaded...
photo-1507143550189-fed454f93097.jpg was downloaded...
photo-1549692520-acc6669e2f0c.jpg was downloaded...
photo-1516972810927-80185027ca84.jpg was downloaded...
photo-1504198453319-5ce911bafcde.jpg was downloaded...
photo-1550439062-609e1531270e.jpg was downloaded...
photo-1513938709626-033611b8cc03.jpg was downloaded...
photo-1530224264768-7ff8c1789d79.jpg was downloaded...
photo-1524429656589-6633a470097c.jpg was downloaded...
photo-1522364723953-452d3431c267.jpg was downloaded...
photo-1532009324734-20a7a5813719.jpg was downloaded...
photo-1493976040374-85c8e12f0c0e.jpg was downloaded...
photo-1541698444083-023c97d3f4b6.jpg was downloaded...
Finished in 24.59 seconds
