
## MapReduce 

A List of builtin functions are given [here](https://docs.python.org/3/library/functions.html).

Three functions
1. map()
2. filter()
3. reduce()


## Multiprocessing
1. multiprocessing.cpu_count
2. multiprocessing.Pool
3. map

In [None]:
# Initialization
num = [i for i in range(1,20)]
print(num)

In [None]:
filter?

The function `filter(function, iterable)` takes two parameters: a function and an iterable. The function acts on each element of an iterable data type.

In the first example, we use `None`as the first parameter. In this case, filter will act as an identity function and returns the iterable.

In [None]:
# Use of filter function with None as the first parameter
num = [i for i in range(1,20)]
filtered = list(filter(None, num))
print(filtered)

In [None]:
def even(item):
    if (item % 2 == 0):
        return True
    return False

num = [i for i in range(1,20)]
filtered = list(filter(even, num))
print(filtered)

In [None]:
def odd(item):
    if (item % 2 == 0):
        return False
    return True

num = [i for i in range(1,20)]
filtered = list(filter(odd, num))
print(filtered)

$f(x) = x ^ 2$

$g([a,b,...]) = [f(a), f(b), ..]$

$g([a,b,...]) = [a^2, b^2, ..]$

In [None]:
map?

What if we want to apply some calculations on every element of the iterable?

Python provides another builtin function called `map(function, iterable, ...)`.

In [None]:
def square(item):
    return item * item

num = [i for i in range(1,20)]
squared = list(map(square, num))
print(filtered)

In [None]:
def product(item1, item2):
    return item1 * item2

num1 = [i for i in range(1,20)]
print(num1)
num2 = [i for i in range(10,20)]
print(num2)
product_value = list(map(product, num1, num2))
print(filtered)

$f(x) = x ^ 2$

$g([a,b,...]) = [f(a), f(b), ..]$

$g([a,b,...]) = [a^2, b^2, ..]$

$h(g([a,b,...])) = a^2 + b^2+ ... $ 

In [None]:
from functools import reduce

reduce?

In [None]:
from functools import reduce
import random

def sum_num(item1, item2):
    return item1 + item2

num = [i for i in range(1,20)]
print(num)

sum_value = reduce(sum_num, num)
print(sum_num)

num = [random.uniform(0, i) for i in range(1,20)]
sum_value = reduce(sum_num, num)
print(sum_value)


In [None]:
from functools import reduce
import random

def sum_num(item1, item2):
    return item1 + item2

num = [random.uniform(0, i) for i in range(1,20)]
sum_value = reduce(sum_num, num)
print(sum_value)

In [None]:
from functools import reduce

def product(item1, item2):
    return item1 * item2

num = [i for i in range(1,20)]
print(num)
sum_num = reduce(product, num)
print(sum_num)

In [None]:
def even(item):
    if (item % 2 == 0):
        return True
    return False

num = [i for i in range(1,20)]
filtered = list(filter(lambda x: x%2 == 0, num))
print(filtered)

In [None]:
def even(item):
    if (item % 2 == 0):
        return True
    return False

num = [i for i in range(1,20)]
filtered = list(filter(lambda x: x%2 != 0, num))
print(filtered)

In [None]:
num = [i for i in range(1,20)]
squared = list(map(lambda x: x *2, num))
print(squared)

In [None]:
num1 = [i for i in range(1,20)]
print(num1)
num2 = [i for i in range(10,20)]
print(num2)
product = list(map(lambda x, y: x *y , num1, num2))
print(product)

In [None]:
from functools import reduce
import random

num = [i for i in range(1,20)]
print(num)

sum_value = reduce(lambda x,y: x + y, num)
print(sum_value)


In [None]:
from functools import reduce
import random

num = [i for i in range(1,20)]
print(num)

sum_value = reduce(lambda x,y: x + y, num)
print(sum_value)


In [None]:
from functools import reduce
import random

num = [i for i in range(1,20)]
print(num)

product_value = reduce(lambda x,y: x * y, num)
print(product_value)

In [None]:
import multiprocessing as mp
mp.cpu_count?

In [None]:
import multiprocessing as mp
print(mp.cpu_count())

In [None]:
import multiprocessing as mp
mp.Pool?

In [None]:
from functools import reduce
import multiprocessing as mp

cpu_count = mp.cpu_count()

def squared(x):
    return x * x

num = [i for i in range(1,20)]
with mp.Pool(processes=cpu_count) as pool:
    list_squared = pool.map(squared, num)
    print(list_squared)
    product_value = reduce(lambda x,y: x * y, list_squared)
    print(product_value)

In [None]:
import requests

def download_page(item):
    r = requests.get('https://www.wikidata.org/wiki/Special:EntityData/' + item + '.json')
    if r.status_code == 200:
        with open(item + ".json", "w") as w:
            w.write(str(r.json()))
        w.close()
    return r.status_code

pages = ["Q1", "Q2", "Q3", "Q4", "Q5", "Q6"]
with mp.Pool(processes=cpu_count) as pool:
    status = pool.map(download_page, pages)
    print(status)

In [None]:
import os

def analyse_file(filename):
    with open(filename, "r") as w:
        data = w.read()
        tokens = data.split(",")
        urls = list(filter(lambda w: "wikipedia.org" in w, tokens))
        return len(urls)
    return 0

files = os.listdir(".")
json_files = list(filter(lambda f: ".json" in f, files))

with mp.Pool(processes=cpu_count) as pool:
    counts = pool.map(analyse_file, json_files)
    print(counts)
    total_count = reduce(lambda x,y: x + y, counts)
    print(total_count)