In [1]:
#Tests of multi-threading and multi-processing performance in Python 
import random
from threading import Thread
from multiprocessing import Process
import time
from urllib.request import urlopen

In [2]:
#first, let's test a CPU-bounded task (with no I/O) 
def countdown(n):
    while n>0:
        n -= 1

In [3]:
num = 1000000000

start = time.time()
countdown(num)
end = time.time()

single_thread_result = end - start
print('Single thread test')
print('Time taken in seconds: ', single_thread_result)

Single thread test
Time taken in seconds:  52.53856897354126


In [4]:
thread_1 = Thread(target=countdown, args=(num//2,))
thread_2 = Thread(target=countdown, args=(num//2,))

start = time.time()
thread_1.start()
thread_2.start()
thread_1.join()
thread_2.join()
end = time.time()

multi_thread_result = end - start
print("Multi-threading test")
print('Time taken in seconds: ', multi_thread_result)

Multi-threading test
Time taken in seconds:  54.16907000541687


In [5]:
def performance(single, multi): 
    if(single <= multi): 
        print("Single threaded process is more efficient than multi-threaded")
    else: 
        print("Multi-threaded process is more efficient than single")

In [6]:
performance(single_thread_result, multi_thread_result)

Single threaded process is more efficient than multi-threaded


In [7]:
'''Clearly, the GIL makes CPU-bounded tasks with no I/O very inefficient for multi-threading. This is because they are essentially run as a single core but with even more overhead due to the lock. ''' 

'Clearly, the GIL makes CPU-bounded tasks with no I/O very inefficient for multi-threading. This is because they are essentially run as a single core but with even more overhead due to the lock. '

In [11]:
#Now let's test an I/O-bounded task

urls = ['http://www.youtube.com', 'http://www.facebook.com',
        'http://www.baidu.com', 'http://www.yahoo.com', 
        'http://www.amazon.com', 'http://www.wikipedia.org']


def open_url_single_thread(url):
    start = time.time() 
    for i in range(len(url)): 
        resp = urlopen(url[i])
    stop = time.time() 
    return "This single-threaded process took: " +  str(stop - start) + " seconds"

open_url_single_thread(urls) 

'This single-threaded process took: 1.4330229759216309 seconds'

In [13]:
def open_url_multi_thread(url): 
    for i in range(len(url)): 
        resp = urlopen(url[i])
        
thread_1 = Thread(target=open_url_multi_thread, args=[urls[:3]])
thread_2 = Thread(target=open_url_multi_thread, args=[urls[3:]])

start = time.time()
thread_1.start()
thread_2.start()
thread_1.join()
thread_2.join()
end = time.time()

multi_thread_result = end - start
print("This multi-threaded process took: " +  str(end - start) + " seconds")

This multi-threaded process took: 0.7608098983764648 seconds


In [16]:
'''Interesting, it is clear that for tasks that involve I/O, multi-threading with python is actually quite efficient. This is because the GIL is released whenever a task is waiting on an I/O, so concurrency is possible (contrasting CPU-bounded tasks which never release it unless forced)'''

'Interesting, it is clear that for tasks that involve I/O, multi-threading with python is actually quite efficient. This is because the GIL is released whenever a task is waiting on an I/O, so concurrency is possible (contrasting CPU-bounded tasks which never release it unless forced)'

In [17]:
#let's repeat the tests, but for processes instead of threads 

#first, the CPU-bounded task (with no I/O) 
process_1 = Process(target=countdown, args=(num//2,))
process_2 = Process(target=countdown, args=(num//2,))

start = time.time()
process_1.start()
process_2.start()
process_1.join()
process_2.join()
end = time.time()

multi_process_result = end - start
print("Multi-processing test")
print('Time taken in seconds: ', multi_process_result)

Multi-processing test
Time taken in seconds:  28.029763221740723


In [18]:
'''Wow, look at that improvement. Clearly, for CPU intensive tasks, multi-processing is the way to go'''

'Wow, look at that improvement. Clearly, for CPU intensive tasks, multi-processing is the way to go'

In [20]:
#Now, the I/O task 

def open_url_multi_thread(url): 
    for i in range(len(url)): 
        resp = urlopen(url[i])
        
process_1 = Process(target=open_url_multi_thread, args=[urls[:3]])
process_2 = Process(target=open_url_multi_thread, args=[urls[3:]])

start = time.time()
process_1.start()
process_2.start()
process_1.join()
process_2.join()
end = time.time()

multi_thread_result = end - start
print("This multi-threaded process took: " +  str(end - start) + " seconds")

This multi-threaded process took: 0.8122298717498779 seconds


In [21]:
'''Not bad, but for I/O tasks multi-threading still seems superior'''

'Not bad, but for I/O tasks multi-threading still seems superior'