In [13]:
#First let's create a test script for proof of concept. 
#This is a simple data structure transformation script taken from
#Youtuber 'Real Python' at https://www.youtube.com/watch?v=aysceqdGFw8

#Let's make the proper imports.
import collections
import os
import time
from pprint import pprint

# Serial Processing

The following code will generate a tuple consisting of scientist names and data about them. Then, the tuple is passed through the 'map' process which performs a simple act on each element of the input tuple. The resulting act is saved in a second tuple and then the next entry is processed.

This is performed on one processor on one thread, so the computer must wait for the first entry to finish processing until the second one can begin. 

With the addition of a time.sleep() requirement, this can take quite some time!

In [14]:
Scientist = collections.namedtuple('Scientist', [
    'name',
    'field',
    'born',
    'PhD',
])

scientists = (
    Scientist(name='Sean Lewis', field='Astrophysics', born=1994, PhD=False),
    Scientist(name='Weixiang Yu', field='Astronomy', born=1992, PhD=False),
    Scientist(name='Jaqueline Moreno', field='Astronomy', born=1991, PhD=True),
    Scientist(name='Stephen Sclafani', field='Particle', born=1986, PhD=False),
    Scientist(name='Eli Worth', field='Condensed Matter', born=1993, PhD=False),
    Scientist(name='David Lioi', field='Biophysics', born=1990, PhD=True)
)

pprint(scientists)

def transform(x):
    print('\nProcess ' + str(os.getpid()) + ' working record ' + str(x.name))
    time.sleep(1)
    result = {'name': x.name, 'age': 2020 - x.born}
    print('\nProcess ' + str(os.getpid()) + ' done processing record ' + str(x.name))
    
    return result

start = time.time()


result = tuple(map(
    transform,
    scientists
))

end = time.time()
elapsed = end - start

print('\n')
pprint(result)
print('\n Time to complete:  {0:.3f}'.format(elapsed))

(Scientist(name='Sean Lewis', field='Astrophysics', born=1994, PhD=False),
 Scientist(name='Weixiang Yu', field='Astronomy', born=1992, PhD=False),
 Scientist(name='Jaqueline Moreno', field='Astronomy', born=1991, PhD=True),
 Scientist(name='Stephen Sclafani', field='Particle', born=1986, PhD=False),
 Scientist(name='Eli Worth', field='Condensed Matter', born=1993, PhD=False),
 Scientist(name='David Lioi', field='Biophysics', born=1990, PhD=True))

Process 45536 working record Sean Lewis

Process 45536 done processing record Sean Lewis

Process 45536 working record Weixiang Yu

Process 45536 done processing record Weixiang Yu

Process 45536 working record Jaqueline Moreno

Process 45536 done processing record Jaqueline Moreno

Process 45536 working record Stephen Sclafani

Process 45536 done processing record Stephen Sclafani

Process 45536 working record Eli Worth

Process 45536 done processing record Eli Worth

Process 45536 working record David Lioi

Process 45536 done processing re

Great! We can see the input, which computer core is doing the processing for each task, the output, and the total time to complete the cell. Obviously from the output, the same core is processing each task, and we can watch each task crawl its way through. How can we utilize the full capacity of the quad-core processor in the MacBook Pro?

# Parallel Processing

In [15]:
import multiprocessing

pprint(scientists)

start = time.time()
print('\n')

pool = multiprocessing.Pool()
result = pool.map(transform, scientists)

end = time.time()


print('\n')
pprint(result)
print('\n Time to complete:  {0:.3f}'.format(end-start))

(Scientist(name='Sean Lewis', field='Astrophysics', born=1994, PhD=False),
 Scientist(name='Weixiang Yu', field='Astronomy', born=1992, PhD=False),
 Scientist(name='Jaqueline Moreno', field='Astronomy', born=1991, PhD=True),
 Scientist(name='Stephen Sclafani', field='Particle', born=1986, PhD=False),
 Scientist(name='Eli Worth', field='Condensed Matter', born=1993, PhD=False),
 Scientist(name='David Lioi', field='Biophysics', born=1990, PhD=True))



Process 45866 working record Sean Lewis
Process 45869 working record Stephen Sclafani
Process 45868 working record Jaqueline Moreno
Process 45867 working record Weixiang Yu
Process 45871 working record David Lioi
Process 45870 working record Eli Worth






Process 45867 done processing record Weixiang Yu
Process 45868 done processing record Jaqueline Moreno
Process 45869 done processing record Stephen Sclafani
Process 45871 done processing record David Lioi
Process 45866 done processing record Sean Lewis
Process 45870 done processing reco

As you can see, there are 6 different processors that are active in this execution. They each take on one task, this being the transform function on one data table entry and all execute simultaneously. The result is a process that completes ~6x faster than before! 