<h2>Section 1: Immutable Data Structures<h2>

In [None]:
scientists1 = [
{'name': 'Ada Lovelace', 'field' : 'math', 'born': 1815, 'nobel': False},
{'name': 'Emmy Noether', 'field' : 'math', 'born': 1882, 'nobel': False},
{'name': 'Marie Curie', 'field' : 'physics', 'born': 1867, 'nobel': True},
{'name': 'Tu Youyou', 'field' : 'chemistry', 'born': 1930, 'nobel': True},
{'name': 'Ada Yonath', 'field' : 'chemistry', 'born': 1939, 'nobel': True},
{'name': 'Vera Rubin', 'field' : 'astronomy', 'born': 1928, 'nobel': False},
{'name': 'Sally Ride', 'field' : 'physics', 'born': 1951, 'nobel': False},
]

In [24]:
scientists1

[{'name': 'Ed Lovelace', 'field': 'math', 'born': 1815, 'nobel': False},
 {'name': 'Emmy Noether', 'field': 'math', 'born': 1882, 'nobel': False},
 {'name': 'Marie Curie', 'field': 'physics', 'born': 1867, 'nobel': True},
 {'name': 'Tu Youyou', 'field': 'chemistry', 'born': 1930, 'nobel': True},
 {'name': 'Ada Yonath', 'field': 'chemistry', 'born': 1939, 'nobel': True},
 {'name': 'Vera Rubin', 'field': 'astronomy', 'born': 1928, 'nobel': False},
 {'name': 'Sally Ride', 'field': 'physics', 'born': 1951, 'nobel': False}]

In [20]:
scientists1[0]['name'] = 'Ed Lovelace' # Mutable

In [1]:
# creates class of type Scientist with the given attributes
import collections
Scientist = collections.namedtuple('Scientist', [
    'name',
    'field',
    'born',
    'nobel'
])

In [48]:
# Example - create ada object of type Scientist
ada = Scientist (name='Ada Lovelace', field='math', born=1815, nobel=False)
#Printing its attribute which are not mutable
ada.name

'Ada Lovelace'

In [60]:
scientists_list = [
    Scientist (name='Ada Lovelace', field='math', born=1815, nobel=False),
    Scientist (name='Emmy Noether', field='math', born=1882, nobel=False),
    Scientist (name='Marie Curie', field='physics', born=1867, nobel=True)
]

In [61]:
scientists_list[0]['name'] = "Doggy" # immutable

TypeError: 'Scientist' object does not support item assignment

In [62]:
del scientists_list[0] # can still delete the element in the list of scientists.
scientists_list

[Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientist(name='Marie Curie', field='physics', born=1867, nobel=True)]

In [2]:
# create a tuple of Scientists to make it completley immutable
scientists_tuple = (
    Scientist (name='Ada Lovelace', field='math', born=1815, nobel=False),
    Scientist (name='Emmy Noether', field='math', born=1882, nobel=False),
    Scientist (name='Marie Curie', field='physics', born=1867, nobel=True),
    Scientist (name='Sally Ride', field='physics', born=1899, nobel=True)
)

In [89]:
del scientists_tuple[0] # Immutable 
scientists_tuple[0]['name'] = "Doggy" # immutable

TypeError: 'tuple' object doesn't support item deletion

<h2>Section 2: The filter() Function<h2>

In [66]:
fs = filter( lambda x: x.nobel is True, scientists_tuple ) # returns an iterable object
fs # prints the iterable object

<filter at 0x7f2456a0aa30>

In [68]:
next(fs) # iterate through the next element in the object.

Scientist(name='Marie Curie', field='physics', born=1867, nobel=True)

In [77]:
tuple(filter( lambda x: x.nobel is True, scientists_tuple )) 
# convert filters iterable to a tuple and return it

(Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),)

In [78]:
tuple(filter( lambda x: True, scientists_tuple )) 
# Filter and return all elements 

(Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
 Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientist(name='Marie Curie', field='physics', born=1867, nobel=True))

In [83]:
tuple(filter( lambda x: x.field == 'physics' and x.nobel , scientists_tuple )) 

(Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),)

In [84]:
# do it another way1 - more clutter not much reusable
for x in scientists_tuple:
    if x.nobel is True:
        print(x)

Scientist(name='Marie Curie', field='physics', born=1867, nobel=True)


In [85]:
# do it another way2 -  much cleaner
def nobel_filter(x):
    return x.nobel is True

tuple(filter( nobel_filter , scientists_tuple )) 

(Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),)

In [86]:
# List comprehension , more pythonic way . but creates a list intermediate object and 
# discards it after converting it into a tuple.
tuple([x for x in scientists_tuple if x.nobel is True])

(Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),)

In [88]:
#generator expression - does not create an intermediate list object
tuple(x for x in scientists_tuple if x.nobel is True)

(Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),)

<h2>Section 3: The map() Function<h2>

In [94]:
scientists_tuple

(Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
 Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientist(name='Marie Curie', field='physics', born=1867, nobel=True))

In [98]:
names_and_ages = tuple (map(
    lambda x: {'name': x.name, 'age': 2020 - x.born }, scientists_tuple
))

In [99]:
names_and_ages

({'name': 'Ada Lovelace', 'age': 205},
 {'name': 'Emmy Noether', 'age': 138},
 {'name': 'Marie Curie', 'age': 153})

In [100]:
# using lisst comprehension way
[{'name': x.name, 'age': 2020 - x.born } for x in scientists_tuple]

[{'name': 'Ada Lovelace', 'age': 205},
 {'name': 'Emmy Noether', 'age': 138},
 {'name': 'Marie Curie', 'age': 153}]

In [109]:
# using generator expression
generator_object = ({'name': x.name.upper(), 'age': 2020 - x.born } for x in scientists_tuple)

In [110]:
tuple(generator_object)

({'name': 'ADA LOVELACE', 'age': 205},
 {'name': 'EMMY NOETHER', 'age': 138},
 {'name': 'MARIE CURIE', 'age': 153})

<h2>Section 4: The reduce() Function<h2>

In [112]:
from functools import reduce

In [113]:
scientists_tuple

(Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
 Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientist(name='Marie Curie', field='physics', born=1867, nobel=True))

In [116]:
total_age = reduce(
    lambda totalage , element: totalage + element['age'], 
    names_and_ages, 
    0
)

In [117]:
total_age

496

In [118]:
# another way to doo reduce using for loops
sum(x['age'] for x in names_and_ages)

496

In [134]:
# using reducer functions
def reducer(acc, item):
    acc[item.field].append(item.name)
    return acc

In [135]:
scientists_by_field1 = reduce(
    reducer,
    scientists_tuple,
    {'math': [], 'physics' : [], 'chemistry': [], 'astronomy': []}
)

In [136]:
scientists_by_field1

{'math': ['Ada Lovelace', 'Emmy Noether'],
 'physics': ['Marie Curie', 'Sally Ride'],
 'chemistry': [],
 'astronomy': []}

In [137]:
import collections

In [138]:
scientists_by_field2 = reduce(
    reducer,
    scientists_tuple,
    collections.defaultdict(list) # automatically creates the key every time we try to access it . use this instead of manually defining accumulator every time.
)

In [139]:
scientists_by_field2

defaultdict(list,
            {'math': ['Ada Lovelace', 'Emmy Noether'],
             'physics': ['Marie Curie', 'Sally Ride']})

In [160]:
import itertools ##########Review Again #################

In [157]:
itertools.groupby(scientists_tuple, lambda x: x.field) # returns an inerable oobject

<itertools.groupby at 0x7f245671c8b0>

In [161]:
scientists_by_field3 = {
    item[0] : list(item[1])
    for item in itertools.groupby(scientists_tuple, lambda x: x.field)
}

In [162]:
scientists_by_field3

{'math': [Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
  Scientist(name='Emmy Noether', field='math', born=1882, nobel=False)],
 'physics': [Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),
  Scientist(name='Sally Ride', field='physics', born=1899, nobel=True)]}

<h2>Parallel Processing With multiprocessing: Overview<h2>

In [4]:
scientists_tuple

(Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
 Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),
 Scientist(name='Sally Ride', field='physics', born=1899, nobel=True))

In [4]:
import time
import multiprocessing
from pprint import pprint
import os

def transform(x):
    print(f'Process {os.getpid()} started processing the record {x.name}\n')
    time.sleep(2)
    result = {'name': x.name, 'age': 2020 - x.born}
    print(f'Process {os.getpid()} completed Processing record {x.name}\n')
    return result

In [5]:
# Sequential processsing code
start = time.time()
result = tuple(map( transform, scientists_tuple )) 
end = time.time()


print(f'Time to complete: {end-start:.2f}')
pprint(result)

Process 4028 started processing the record Ada Lovelace

Process 4028 completed Processing record Ada Lovelace

Process 4028 started processing the record Emmy Noether

Process 4028 completed Processing record Emmy Noether

Process 4028 started processing the record Marie Curie

Process 4028 completed Processing record Marie Curie

Process 4028 started processing the record Sally Ride

Process 4028 completed Processing record Sally Ride

Time to complete: 8.01
({'age': 205, 'name': 'Ada Lovelace'},
 {'age': 138, 'name': 'Emmy Noether'},
 {'age': 153, 'name': 'Marie Curie'},
 {'age': 121, 'name': 'Sally Ride'})


In [7]:
#Multiprocessing code
start = time.time()

pool = multiprocessing.Pool()
pool.map(transform, scientists_tuple)

end = time.time()


print(f'Time to complete: {end-start:.2f}')
pprint(result)

Process 3826 started processing the record Ada Lovelace
Process 3827 started processing the record Emmy Noether
Process 3829 started processing the record Sally Ride
Process 3828 started processing the record Marie Curie




Process 3827 completed Processing record Emmy Noether
Process 3826 completed Processing record Ada Lovelace
Process 3829 completed Processing record Sally Ride
Process 3828 completed Processing record Marie Curie




Time to complete: 2.08
({'age': 205, 'name': 'Ada Lovelace'},
 {'age': 138, 'name': 'Emmy Noether'},
 {'age': 153, 'name': 'Marie Curie'},
 {'age': 121, 'name': 'Sally Ride'})


In [7]:
# Multiprocessing code with specifying the number ofo processes even on less number of cores . 
# example we can run 4 proocesses in a dual core machine . 

start = time.time()

pool = multiprocessing.Pool(processes = 4, maxtasksperchild=1)
pool.map(transform, scientists_tuple)

end = time.time()


print(f'Time to complete: {end-start:.2f}')
pprint(result)

Process 4078 started processing the record Emmy Noether
Process 4079 started processing the record Marie Curie
Process 4077 started processing the record Ada Lovelace
Process 4080 started processing the record Sally Ride




Process 4079 completed Processing record Marie Curie

Process 4078 completed Processing record Emmy Noether
Process 4080 completed Processing record Sally Ride
Process 4077 completed Processing record Ada Lovelace


Time to complete: 2.03
({'age': 205, 'name': 'Ada Lovelace'},
 {'age': 138, 'name': 'Emmy Noether'},
 {'age': 153, 'name': 'Marie Curie'},
 {'age': 121, 'name': 'Sally Ride'})



<h2>Section 6: Parallel Processing With concurrent.futures <h2>

In [13]:
import concurrent.futures

In [17]:
# only in Python 3 for concurrent asynchronous prooocessing , 

start = time.time()

with concurrent.futures.ProcessPoolExecutor() as executor:
    result = executor.map(transform, scientists_tuple)

end = time.time()

print(f'Time to complete: {end-start:.2f}')
pprint(result) # this returns an iterator by default

Process 4158 started processing the record Ada Lovelace
Process 4163 started processing the record Emmy Noether


Process 4170 started processing the record Marie Curie

Process 4175 started processing the record Sally Ride

Process 4158 completed Processing record Ada Lovelace

Process 4163 completed Processing record Emmy Noether

Process 4170 completed Processing record Marie Curie

Process 4175 completed Processing record Sally Ride

Time to complete: 2.03
<generator object _chain_from_iterable_of_lists at 0x7f0ca16d8120>


In [18]:
# only in Python 3 for concurrent asynchronous processing 
# using thread poool instead fo process pool
# Here multiple threads run in parallel for the same process


start = time.time()

with concurrent.futures.ThreadPoolExecutor() as executor: 
    result = executor.map(transform, scientists_tuple)

end = time.time()

print(f'Time to complete: {end-start:.2f}')

Process 4028 started processing the record Ada Lovelace

Process 4028 started processing the record Emmy Noether

Process 4028 started processing the record Marie Curie

Process 4028 started processing the record Sally Ride

Process 4028 completed Processing record Emmy Noether

Process 4028 completed Processing record Ada Lovelace

Process 4028 completed Processing record Sally Ride

Process 4028 completed Processing record Marie Curie

Time to complete: 2.01


# Python works bassed on Globalinterpretorloock.
# every proocess has its own interpretor and no 2 threads within a process can execute python code at same time . 
# use process based parallelism as every process gets its own intrepretor , so they can run in parallel. this is where concurrent.futures moodules is handy
# processpool executor is just a wrapper on the multiprocessing pool.
# use concurrent.futures to wasily switchout strategies instead of using the multiprocesssing moodule

#

Because of the GIL, no two threads can execute Python code at the same time. So even if you have 
multiple threads running in your Python program, only one of them can execute at a time. The best 
way to get around this is to use process-based parallel programing, or process-based parallelism.



https://realpython.com/lessons/when-use-concurrent-futures-or-multiprocessing/

