# Multiprocessing using Pools 
A simple framework for assessing the impact of multiprocessing on runtime on a multi-core machine. 

In [1]:
import time
import math
import multiprocessing
from multiprocessing import Pool
import matplotlib.pyplot as plt

# A function for timing a job that uses a pool of processes.
#  f is a function that takes a single argument
#  data is an array of arguments on which f will be mapped
#  pool_size is the number of processes in the pool. 
def pool_process(f, data, pool_size):
    tp1 = time.time()
    pool = Pool(processes=pool_size) # initialize the Pool.
    result = pool.map(f, data)       # map f to the data using the Pool of processes to do the work 
    pool.close() # No more processes
    pool.join()  # Wait for the pool processing to complete. 
    # print("Results", result)
    # print("Overall Time:", int(time.time()-tp1))
    return float(time.time()-tp1)
 

## Load Target function separately 
The target function needs to be loaded in from a separate python file.
So save this function declaration in a file miscFunc.py

    import math

    def my_func(x):  
        s = math.sqrt(x)  
        return s
    

In [2]:
import miscFunc as miscFunc # if miscFunc.py is changed the kernel needs to be restarted. 

This verbose version shows which process in the pool is running each task.   
Put this definition in miscFunc.py as well.  

    def my_func_verbose(x):
        s = math.sqrt(x)
        print("Task", multiprocessing.current_process(), x, s)
        return s

In [3]:
dataRange = range(20)

Use the pool_process function to apply my_func to the data in dataRange.  
This task is so light it requires very little processing time. 

In [4]:
dataRange = range(10)
pool_process(miscFunc.my_func, dataRange, 1)

2.56972074508667

## A naive function for checking primes 

In [None]:
# def check_prime(num):
#     t1 = time.time()
#     res = False
#     if num > 0:
#         # check for factors
#         for i in range(2,num):
#             if (num % i) == 0:
#                 print(num,"is not a prime number")
#                 print(i,"times",num//i,"is",num)
#                 print("Time:", int(time.time()-t1))
#                 break
#         else:
#             print(num,"is a prime number")
#             print("Time:", time.time()-t1) 
#             res = True
#             # if input number is less than
#             # or equal to 1, it is not prime
#     print("Task", multiprocessing.current_process(), num, res)
#     return res


In [5]:
miscFunc.check_prime(15488801)

15488801 is a prime number
Time: 3.603390693664551
Task <_MainProcess name='MainProcess' parent=None started> 15488801 True


True

In [6]:
miscFunc.check_prime(15488803)

15488803 is not a prime number
11 times 1408073 is 15488803
Time: 0
Task <_MainProcess name='MainProcess' parent=None started> 15488803 False


False

In [7]:
check_work = [45,13,4]

In [8]:
pool_process(miscFunc.my_func_verbose, check_work, 1)

2.448552131652832

# Multiprocessing comparison for prime number checking function

## Set up input array

In [9]:
one_core_runtime = []
two_core_runtime = []
three_core_runtime = []
four_core_runtime = []

for i in range(1, 100):
    print(i)
    number_array = [x for x in range(10000000, 10000000 + i)]
    one_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 1))
    two_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 2))
    three_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 3))
    four_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 4))

length_of_arrays = [x for x in range(1, 4)]

# with open("primes1.txt") as f:
#     lines = f.readlines()
#     for line in lines[50000:50010]:
#         if line == "\n":
#             continue
#         line = line.replace("  ", ",").replace(" \n", "")[1:]
#         line = line.split(",")
#         for number in line:
#             test_prime_array.append(int(number))

# f.close()

1
2
3
4


KeyboardInterrupt: 

In [10]:
one_core_runtime = []

for i in range(10, 110, 10):
    print(i)
    number_array = [x for x in range(10000000, 10000000 + i)]
    one_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 1))
    # two_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 2))
    # three_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 3))
    # four_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 4))

10
20
30
40
50
60
70
80
90
100


In [11]:
four_core_runtime = []

for i in range(10, 110, 10):
    print(i)
    number_array = [x for x in range(10000000, 10000000 + i)]
    four_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 4))
    # two_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 2))
    # three_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 3))
    # four_core_runtime.append(pool_process(miscFunc.check_prime, number_array, 4))

10
20
30
40
50
60
70
80
90
100


In [12]:
print(one_core_runtime)

[2.38470196723938, 3.7512669563293457, 3.7831101417541504, 3.8577563762664795, 4.039170265197754, 3.927125930786133, 4.798143148422241, 6.798824310302734, 6.757781267166138, 6.918298721313477]


In [13]:
print(four_core_runtime)

[3.01619553565979, 4.494662284851074, 4.1751158237457275, 4.2712225914001465, 4.512353897094727, 4.8540472984313965, 4.543865919113159, 5.977117538452148, 5.930950403213501, 5.533954858779907]


Observations: When the array input size reaches length = 20, the runtime jumps from around 0.3 seconds to 2 seconds. For smaller array sizes, 4 cores tends to underperform relative to 1 core. However, as the array input size gets larger, it starts to overperform