# Performance of Multithreading and Multiprocessing in Python


In [None]:
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import numpy as np
import time
import matplotlib.pyplot as plt
import random
import string
%matplotlib inline

In [None]:
MULTITHREADING_TITLE="Multithreading"
MULTIPROCESSING_TITLE="Multiprocessing"

def visualize_runtimes(results, title):
    start,stop = np.array(results).T
    plt.barh(range(len(start)),stop-start,left=start)
    plt.grid(axis='x')
    plt.ylabel("Tasks")
    plt.xlabel("Seconds")
    plt.title(title)
    return stop[-1]-start[0]

In [None]:
def multithreading(func, args, workers):
    begin_time = time.time()
    with ThreadPoolExecutor(max_workers=workers) as executor:
        res = executor.map(func, args, [begin_time for i in range(len(args))])
    return list(res)
        
def multiprocessing(func, args, workers):
    begin_time = time.time()
    with ProcessPoolExecutor(max_workers=workers) as executor:
        res = executor.map(func, args, [begin_time for i in range(len(args))])
    return list(res)

## URL access

In [None]:
from urllib.request import urlopen

def download(url, base):
    start = time.time() - base
    try:
        resp = urlopen(url)
    except Exception as e:
        print ('ERROR: %s' % e)
    stop = time.time() - base
    return start,stop

In [None]:
N = 16
URL = 'http://137.204.57.73/50megs-random-file'
urls = [URL for i in range(N)]

#### Sequential

In [None]:
%timeit -n 1 -r 5 [download(url, 1) for url in urls]

#### Multithreading

In [None]:
visualize_runtimes(multithreading(download, urls, 1), "Single Thread")

In [None]:
visualize_runtimes(multithreading(download, urls, 2),MULTITHREADING_TITLE)

In [None]:
visualize_runtimes(multithreading(download, urls, 4),MULTITHREADING_TITLE)

#### Multiprocessing

In [None]:
import out_api
visualize_runtimes(multiprocessing(out_api.download, urls, 1), "Single Process")

In [None]:
visualize_runtimes(multiprocessing(out_api.download, urls, 2), MULTIPROCESSING_TITLE)

In [None]:
visualize_runtimes(multiprocessing(out_api.download, urls, 4), MULTIPROCESSING_TITLE)

## IO Heavy

In [None]:
def io_heavy(text,base):
    start = time.time() - base
    f = open('output.txt', 'wt', encoding='utf-8')
    f.write(text)
    f.close()
    stop = time.time() - base
    return start,stop

In [None]:
N=16
TEXT = ''.join(random.choice(string.ascii_lowercase) for i in range(10**7*5))

#### Sequential

In [None]:
%timeit -n 1 -r 5 [io_heavy(TEXT,1) for i in range(N)]

#### Multithreading

In [None]:
visualize_runtimes(multithreading(io_heavy, [TEXT for i in range(N)], 1),"Single Thread")

In [None]:
visualize_runtimes(multithreading(io_heavy, [TEXT for i in range(N)], 2),MULTITHREADING_TITLE)

In [None]:
visualize_runtimes(multithreading(io_heavy, [TEXT for i in range(N)], 4),MULTITHREADING_TITLE)

#### Multiprocessing

In [None]:
import out_io
visualize_runtimes(multiprocessing(out_io.io_heavy, [TEXT for i in range(N)], 1),"Single Process")

In [None]:
visualize_runtimes(multiprocessing(out_io.io_heavy, [TEXT for i in range(N)], 2),MULTIPROCESSING_TITLE)

In [None]:
visualize_runtimes(multiprocessing(out_io.io_heavy, [TEXT for i in range(N)], 4),MULTIPROCESSING_TITLE)

## CPU Intensive

In [None]:
def cpu_heavy(n,base):
    start = time.time() - base
    count = 0
    for i in range(n):
        count += i
    stop = time.time() - base
    return start,stop

In [None]:
N = 10**7
ITERS = 16

#### Sequential

In [None]:
%timeit -n 1 -r 5  [cpu_heavy(N, time.time()) for i in range(ITERS)]

#### Multithreading

In [None]:
visualize_runtimes(multithreading(cpu_heavy, [N for i in range(ITERS)], 1),"Single Thread")

In [None]:
visualize_runtimes(multithreading(cpu_heavy, [N for i in range(ITERS)], 2),MULTITHREADING_TITLE)

In [None]:
visualize_runtimes(multithreading(cpu_heavy, [N for i in range(ITERS)], 4),MULTITHREADING_TITLE)

#### Multiprocessing

In [None]:
import out_compute
visualize_runtimes(multiprocessing(out_compute.cpu_heavy, [N for i in range(ITERS)], 1),"Single Process")

In [None]:
visualize_runtimes(multiprocessing(out_compute.cpu_heavy, [N for i in range(ITERS)], 2),MULTIPROCESSING_TITLE)

In [None]:
visualize_runtimes(multiprocessing(out_compute.cpu_heavy, [N for i in range(ITERS)], 4),MULTIPROCESSING_TITLE)