# Map, Reduce & Filter

In [1]:
# Libraries.
import pandas as pd

#### Creating a dataframe

In [2]:
# Pandas Series of last names.
last_names = pd.Series(['Abreu','Chang','De Sanctis','Höhne','Munne','Pardo','Foradada','Basbug','Guerguy','Akesson','Petit','Taskin','Ferri','Rocher'])

In [3]:
# Pandas Series of names.
names = pd.Series(['Alberto','Chao-Ting','Emanuele','Fabia','Ingrid','Javier','Josep','Kerim','Laurent','Lucas','Miquel','Onur','Pau','Salva'])

In [4]:
# Pandas Series of points.
points = pd.Series([10,31,54,72,84,22,44,76,48,87,25,66,39,51])

In [5]:
# Create the dataframe.
students = pd.DataFrame({'names': names, 'last_names': last_names, 'points': points})
students

Unnamed: 0,names,last_names,points
0,Alberto,Abreu,10
1,Chao-Ting,Chang,31
2,Emanuele,De Sanctis,54
3,Fabia,Höhne,72
4,Ingrid,Munne,84
5,Javier,Pardo,22
6,Josep,Foradada,44
7,Kerim,Basbug,76
8,Laurent,Guerguy,48
9,Lucas,Akesson,87


## Map
Applies a function to all the items in a list.

In [6]:
# Define a list.
lst = [1,2,3,4]

In [7]:
# Define a function that divides a number by 2.
def half (x):
    return x/2

In [8]:
# Define a function to transform all the elements of a list according to the instructions on a given function.
def new_calculator(function, iterable):
    result = map(function, iterable)
    # Variable 'result' is a map object - let's print it to see the result.
    print(result)
    # We want to return a list, not a map object - then, we transform the map object into a list.
    return list(result)

In [9]:
# When we call the 'new_calculator' function, it prints a map object and returns a list with the transformed elements.
new_calculator(half, lst)

<map object at 0x11e8b9b38>


[0.5, 1.0, 1.5, 2.0]

In [10]:
# We don't need the new_calculator function, we can directly map variable lst.
map(half, lst)

<map at 0x11e8d8208>

In [11]:
# We use function 'list' to transform the map object into a list.
list(map(half, lst))

[0.5, 1.0, 1.5, 2.0]

In [12]:
# Instead of defining function 'half', we can just use a lambda function.
list(map(lambda x: x/2, lst))

[0.5, 1.0, 1.5, 2.0]

## Filter
Creates a list of elements for which a function returns True.

In [13]:
# Define a function that returns True or False.
def odds(x):
    return x % 2 != 0

In [14]:
# `filter` returns a filter object.
filter(odds, lst)

<filter at 0x11e89aac8>

In [15]:
# If we transform the filter object into a list, we get the elements of the list for which the odds function returns True. 
list(filter(odds, lst))

[1, 3]

In [16]:
# Same as the cell above but now we are using a lambda function to call function odds. 
# Not necessary as we are creating a extra lambda function that we can omit. 
list(filter(lambda x: odds(x), lst))

[1, 3]

In [17]:
# What is the difference between map and filter? 
print("Map:", list(map(odds, lst)))
print("Filter:", list(filter(odds, lst)))

# Map returns the output of the function for each element of the list. 
# Filter returns only the elements of the list where the output of the function is True. 

Map: [True, False, True, False]
Filter: [1, 3]


In [18]:
# We can also use lambda function with filter, instead of defining function odds.
list(filter(lambda x: x % 2 != 0, lst))

[1, 3]

## Apply ~ Map
Applies a function along an axis of a dataframe. By default it applies the function to each column. 

In [19]:
# Apply function half to column points of students dataframe. We are not storing the results, just showing them. 
students['points'].apply(half)

0      5.0
1     15.5
2     27.0
3     36.0
4     42.0
5     11.0
6     22.0
7     38.0
8     24.0
9     43.5
10    12.5
11    33.0
12    19.5
13    25.5
Name: points, dtype: float64

In [20]:
# There are two ways to store the results.
# Method 1: Overwrite the original column.
students['points'] = students['points'].apply(half)
students

Unnamed: 0,names,last_names,points
0,Alberto,Abreu,5.0
1,Chao-Ting,Chang,15.5
2,Emanuele,De Sanctis,27.0
3,Fabia,Höhne,36.0
4,Ingrid,Munne,42.0
5,Javier,Pardo,11.0
6,Josep,Foradada,22.0
7,Kerim,Basbug,38.0
8,Laurent,Guerguy,24.0
9,Lucas,Akesson,43.5


In [21]:
# Method 2: Create a new column and store the results in it.
students = pd.DataFrame({'names': names, 'last_names': last_names, 'points': points})
students['half-points'] = students['points'].apply(half)

In [22]:
students

Unnamed: 0,names,last_names,points,half-points
0,Alberto,Abreu,10,5.0
1,Chao-Ting,Chang,31,15.5
2,Emanuele,De Sanctis,54,27.0
3,Fabia,Höhne,72,36.0
4,Ingrid,Munne,84,42.0
5,Javier,Pardo,22,11.0
6,Josep,Foradada,44,22.0
7,Kerim,Basbug,76,38.0
8,Laurent,Guerguy,48,24.0
9,Lucas,Akesson,87,43.5


In [23]:
# What if we want to remove one point from each student? We can use lambda functions too. 
students['points-minus-one'] = students['points'].apply(lambda x: x - 1)
students

Unnamed: 0,names,last_names,points,half-points,points-minus-one
0,Alberto,Abreu,10,5.0,9
1,Chao-Ting,Chang,31,15.5,30
2,Emanuele,De Sanctis,54,27.0,53
3,Fabia,Höhne,72,36.0,71
4,Ingrid,Munne,84,42.0,83
5,Javier,Pardo,22,11.0,21
6,Josep,Foradada,44,22.0,43
7,Kerim,Basbug,76,38.0,75
8,Laurent,Guerguy,48,24.0,47
9,Lucas,Akesson,87,43.5,86


In [24]:
# What if we want to remove one point from each student if he or she has more than 40 points?
students['points-minus-one'] = students['points'].apply(lambda x: x - 1 if x > 40 else x)
students

Unnamed: 0,names,last_names,points,half-points,points-minus-one
0,Alberto,Abreu,10,5.0,10
1,Chao-Ting,Chang,31,15.5,31
2,Emanuele,De Sanctis,54,27.0,53
3,Fabia,Höhne,72,36.0,71
4,Ingrid,Munne,84,42.0,83
5,Javier,Pardo,22,11.0,22
6,Josep,Foradada,44,22.0,43
7,Kerim,Basbug,76,38.0,75
8,Laurent,Guerguy,48,24.0,47
9,Lucas,Akesson,87,43.5,86
