In [6]:
import re
# ^^^ pyforest auto-imports - don't write above this line
import tqdm
import numpy as np
import pandas as pd


# Functional Paradigm Intro

What other paradigms we are used to?

> <b> Procedural Programming </b>
- Instructions are procedures.
- Side effects are its core.

> <b> Objected Oriented Programming </b>
- Instructions are grouped as part of a state of an object.

> <b> Functional Programming </b>
- No state exists. Just a serie of functions being evaluated. 
- No side effects.
- The solution obtained is entirely based on the input. Like in math where <code>f(x) = y</code>
- This idea leads to the fact that you can also <b>pass functions as arguments</b>. And this helps a lot.


In [45]:
def add_one(x):
    return x + 1


In [49]:
x = 2

In [50]:
f = add_one

In [51]:
f(10)

11

In [54]:
def add_two(x):
    return x + 2

In [52]:
def add_any(f, x):
    return f(x)

In [55]:
add_any(add_two, 3)

5

In [56]:
example_list = [1, 3, 6, 8, 9, 10, 33]

In [28]:
results = []

for item in example_list:
    results.append(item/2)
results

[0.5, 1.5, 3.0, 4.0, 4.5, 5.0, 16.5]

In [29]:
[item/2 for item in example_list]

[0.5, 1.5, 3.0, 4.0, 4.5, 5.0, 16.5]

# Mapping

In [57]:
# Simple list 

example_list = [10, 12, 34, 23, 2, 6, 7]

In [58]:
def half(x):
    return x/2

In [63]:
for i in map(half, example_list):
    print(i)

5.0
6.0
17.0
11.5
1.0
3.0
3.5


In [100]:
map(half, example_list)

<map at 0x1188f4160>

In [61]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [59]:
[half(10), half(12), half(34), half(23), half(2), half(6), half(7)]

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [65]:
x = map(half, example_list)

In [68]:
list(x)

[]

In [69]:
x = map(half, example_list)

In [81]:
next(x)

StopIteration: 

In [103]:
list(zip([1,3,5], ['Andre', 'Matheus', 'Lucas']))

[(1, 'Andre'), (3, 'Matheus'), (5, 'Lucas')]

In [104]:
dict(zip([1,3,5], ['Andre', 'Matheus', 'Lucas']))

{1: 'Andre', 3: 'Matheus', 5: 'Lucas'}

In [82]:
x = list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [99]:
x

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [43]:
map(half, example_list)

<map at 0x1163e6be0>

In [44]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [105]:
half(example_list)

TypeError: unsupported operand type(s) for /: 'list' and 'int'

# Lazy evaluation

Functional programming allows the idea of not calculating the whole function at once. 

These methods return only a `python object`. This haven't calculated nothing yet. As soon as you require the results, it calculates it.

In [175]:
map(half, example_list)

<map at 0x109065fa0>

In [176]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [177]:
for item in map(half, example_list):
    print(item)
    

5.0
6.0
17.0
11.5
1.0
3.0
3.5


In [178]:
set(map(half, example_list))

{1.0, 3.0, 3.5, 5.0, 6.0, 11.5, 17.0}

In [107]:
half

<function __main__.half(x)>

In [112]:
## Sneaky peek:

list(map( lambda x : x/2, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

# Filter

`filter` helps removing elements of a list (or any iterator, anything you can run through) by passing a function that returns `True` or `False`. `filter` will also return a `python object`, but when you require it to show you the results, it will filter out every item that has return `False` on your function.

In [113]:
def check_if_even(x):
    """
    Return True if x is even, else return False"""
    
    
    return x % 2 == 0

In [114]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [115]:
filter(check_if_even, example_list)

<filter at 0x118783c40>

In [116]:
list(filter(check_if_even, example_list))

[10, 12, 34, 2, 6]

In [137]:
~(15 % 2 == 0)

-1

In [146]:
list(filter(lambda x : x % 2 == 0, example_list))

TypeError: unsupported operand type(s) for %: 'list' and 'int'

In [142]:
f = lambda x : x % 2 == 0

In [145]:
f(10)

True

In [58]:
[item for item in example_list if item % 2 == 0]

[10, 12, 34, 2, 6]

In [59]:
list(filter(check_if_even, example_list))

[10, 12, 34, 2, 6]

In [60]:
list(filter(None, example_list))

[10, 12, 34, 23, 2, 6, 7]

In [14]:
example_list

[10, 12, 34, 23, 2, 6, 7]

# Reduce

Reduce brings the idea of an `accumulator`. Imagine you have a function that performs a `sum` for each pair of arguments. `reduce` (from the library `functools`) will consider the first argument of your function an `accumulator` and will run through your iterator recursively applying your function for pairs of items.

For example, for the list [1,4,6,8]

If you perform the following function:
```python
def sum_two_elements(a,b):
    return a+b
```

as 
```python
reduce( sum_two_elements, [1,4,6,8] )
```

The steps it will perform are:
```python
a = 0 # accumulator
b = 1 # value
a + b = 1 # so the accumulator receives this cummulative sum

a = 1 # accumulator
b = 4 # value
a + b = 5
...
a = 5 # accumulator
b = 6 # value 
a + b = 11
...
a = 11 # accumulator
b = 8 # value
a + b = 19

return 19
```

In [147]:
from functools import reduce

In [150]:
def sum_two_elements(a,b):
    print(f'a = {a}, b={b}')
    return a+b

In [151]:
reduce( sum_two_elements, [1,4,6,8])

a = 1, b=4
a = 5, b=6
a = 11, b=8


19

In [153]:
reduce( sum_two_elements, ['Andre ','Ribeiro ', 'de ', 'Barros ', 'Aguiar'])

a = Andre , b=Ribeiro 
a = Andre Ribeiro , b=de 
a = Andre Ribeiro de , b=Barros 
a = Andre Ribeiro de Barros , b=Aguiar


'Andre Ribeiro de Barros Aguiar'

In [154]:
''.join(['Andre ','Ribeiro ', 'de ', 'Barros ', 'Aguiar'])

'Andre Ribeiro de Barros Aguiar'

In [162]:
def my_sum(acc, value):
    print(acc, value)
    if acc % 2 == 0:
        return_value = acc+value
    else:
        return_value = acc

    return return_value

In [163]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [164]:
# sum up to the sum gets an odd value
reduce(my_sum, example_list)

10 12
22 34
56 23
79 2
79 6
79 7


79

In [187]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [188]:
example_list2 = ['a','b', 'c', 'd']

In [189]:
def my_sum(a,b):
    return a + b

In [190]:
sum(example_list2)

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [191]:
reduce(my_sum, example_list2)

'abcd'

# Maps on Pandas

> <code> dataframe.apply() </code>

In [165]:
n = 100

In [166]:
df = pd.DataFrame(np.random.random(n), columns=['col'])

In [167]:
df

Unnamed: 0,col
0,0.129018
1,0.330580
2,0.036198
3,0.916074
4,0.566087
...,...
95,0.105486
96,0.587648
97,0.919041
98,0.530193


In [172]:
def greater_than_half(x):
    if x > 0.5:
        return True
    else:
        return False

In [173]:
greater_than_half(df['col'])

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [175]:
x = df['col']

In [None]:
x.map()

In [176]:
df['col'].map(greater_than_half)

0     False
1     False
2     False
3      True
4      True
      ...  
95    False
96     True
97     True
98     True
99     True
Name: col, Length: 100, dtype: bool

In [177]:
df['col'].apply(greater_than_half)

0     False
1     False
2     False
3      True
4      True
      ...  
95    False
96     True
97     True
98     True
99     True
Name: col, Length: 100, dtype: bool

In [178]:
df['col'].apply(lambda x : True if x > 0.5 else False)

0     False
1     False
2     False
3      True
4      True
      ...  
95    False
96     True
97     True
98     True
99     True
Name: col, Length: 100, dtype: bool

In [180]:
df = pd.DataFrame({'col': ['andre aguiar', 'pablo vitar', 'jair piscerni', 'luis gustavo']})

In [189]:
df['col'].apply(lambda x : x.replace(' ', '_') if x.startswith('a') else x)

0     andre_aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [190]:
def sera_function(nome):
    '''
    This functions was created by Sera. 
    It replaces spaces for underlines if the name starts with a.
    '''
    if nome.startswith('a'):
        return nome.replace(' ', '_')
    else:
        return nome

In [192]:
df['col'].apply(sera_function)

0     andre_aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [203]:
df = pd.DataFrame(np.random.random((10,3)), columns=['col1','col2','col3'])

In [204]:
def minha_condicao(x):
    if x < 0.5:
        return 'ooooi'
    else:
        return 'tchaaau'

In [206]:
df

Unnamed: 0,col1,col2,col3
0,0.218681,0.825647,0.531074
1,0.887633,0.993101,0.588641
2,0.332187,0.071347,0.36991
3,0.0174,0.206369,0.0331
4,0.904627,0.732092,0.916879
5,0.968538,0.748211,0.177135
6,0.330772,0.762093,0.172212
7,0.111786,0.374974,0.196144
8,0.962569,0.880252,0.991519
9,0.822105,0.011586,0.394429


In [207]:
df[['col1','col2']].applymap(minha_condicao)

Unnamed: 0,col1,col2
0,ooooi,tchaaau
1,tchaaau,tchaaau
2,ooooi,ooooi
3,ooooi,ooooi
4,tchaaau,tchaaau
5,tchaaau,tchaaau
6,ooooi,tchaaau
7,ooooi,ooooi
8,tchaaau,tchaaau
9,tchaaau,ooooi


> The most used, though, is the `apply` method. 

In [209]:

df['col_based'] = df['col1'].apply(minha_condicao)

In [210]:
df

Unnamed: 0,col1,col2,col3,col_based
0,0.218681,0.825647,0.531074,ooooi
1,0.887633,0.993101,0.588641,tchaaau
2,0.332187,0.071347,0.36991,ooooi
3,0.0174,0.206369,0.0331,ooooi
4,0.904627,0.732092,0.916879,tchaaau
5,0.968538,0.748211,0.177135,tchaaau
6,0.330772,0.762093,0.172212,ooooi
7,0.111786,0.374974,0.196144,ooooi
8,0.962569,0.880252,0.991519,tchaaau
9,0.822105,0.011586,0.394429,tchaaau


> `applymap` applies a function to your whole dataframe

In [212]:
# df.applymap(minha_condicao)


In [215]:
names = ['Andre_Prof', 'Rai_TA', 'Matheus_TA','Lucas_TA','ChegouTarde_Aluna','Victor_Aluno','Eveline_Aluna']

In [216]:
np.random.choice(names)

'ChegouTarde_Aluna'

In [217]:
example_df = pd.DataFrame([np.random.choice(names) for i in range(10)], columns=['names'])

In [218]:
example_df

Unnamed: 0,names
0,Matheus_TA
1,Eveline_Aluna
2,Andre_Prof
3,Eveline_Aluna
4,Victor_Aluno
5,Rai_TA
6,Victor_Aluno
7,Matheus_TA
8,Lucas_TA
9,Victor_Aluno


In [224]:
'Matheus_TA'.replace('_',' ').split()[-1]

'TA'

# Apply functions with arguments.

In [220]:
def my_replace(x, index):
    return x.replace('_',' ').split()[index]

In [226]:
my_replace('Matheus_TA', 1)

'TA'

In [232]:
example_df['profissao'] = example_df['names'].apply(my_replace, index=1)
example_df['nome'] = example_df['names'].apply(my_replace, index=0)

In [235]:
# example_df['nome'] = example_df['names'].apply(my_replace, index=0, outro_argumento=3 )

In [233]:
example_df

Unnamed: 0,names,profissao,nome
0,Matheus_TA,TA,Matheus
1,Eveline_Aluna,Aluna,Eveline
2,Andre_Prof,Prof,Andre
3,Eveline_Aluna,Aluna,Eveline
4,Victor_Aluno,Aluno,Victor
5,Rai_TA,TA,Rai
6,Victor_Aluno,Aluno,Victor
7,Matheus_TA,TA,Matheus
8,Lucas_TA,TA,Lucas
9,Victor_Aluno,Aluno,Victor


# Apply in axis = 1

Whenever you map (apply) on a pandas dataframe using axis=1, you'll be able to have access to the rows of the dataframe on your function.

In [39]:
df = pd.DataFrame()
df['type'] = example_df['names'].apply(my_replace, index=1)
df['name'] = example_df['names'].apply(my_replace, index=0)
df['score'] = np.random.randint(0,10, size=df.shape[0])

In [238]:
example_df['score'] = np.random.randint(0,10, size=df.shape[0])

In [239]:
example_df

Unnamed: 0,names,profissao,nome,score
0,Matheus_TA,TA,Matheus,7
1,Eveline_Aluna,Aluna,Eveline,6
2,Andre_Prof,Prof,Andre,3
3,Eveline_Aluna,Aluna,Eveline,4
4,Victor_Aluno,Aluno,Victor,1
5,Rai_TA,TA,Rai,7
6,Victor_Aluno,Aluno,Victor,2
7,Matheus_TA,TA,Matheus,2
8,Lucas_TA,TA,Lucas,6
9,Victor_Aluno,Aluno,Victor,3


In [243]:
len(re.findall('[Aa]lun[oa]', 'Aluno'))

<IPython.core.display.Javascript object>

1

In [249]:
def passed(row):
    import re
    
    if row['profissao'] == 'TA':
        if row['score'] >= 6:
            return True
        else:
            return False
    elif len(re.findall('[Aa]lun[oa]', row['profissao'])) > 0:
        if row['score'] >= 5:
            return True
        else:
            return False    
    else:
        return True

In [250]:
example_df

Unnamed: 0,names,profissao,nome,score
0,Matheus_TA,TA,Matheus,7
1,Eveline_Aluna,Aluna,Eveline,6
2,Andre_Prof,Prof,Andre,3
3,Eveline_Aluna,Aluna,Eveline,4
4,Victor_Aluno,Aluno,Victor,1
5,Rai_TA,TA,Rai,7
6,Victor_Aluno,Aluno,Victor,2
7,Matheus_TA,TA,Matheus,2
8,Lucas_TA,TA,Lucas,6
9,Victor_Aluno,Aluno,Victor,3


In [251]:
example_df.apply(passed, axis=1)

0     True
1     True
2     True
3    False
4    False
5     True
6    False
7    False
8     True
9    False
dtype: bool

In [252]:
example_df['passed'] = example_df.apply(passed, axis=1)

In [253]:
example_df

Unnamed: 0,names,profissao,nome,score,passed
0,Matheus_TA,TA,Matheus,7,True
1,Eveline_Aluna,Aluna,Eveline,6,True
2,Andre_Prof,Prof,Andre,3,True
3,Eveline_Aluna,Aluna,Eveline,4,False
4,Victor_Aluno,Aluno,Victor,1,False
5,Rai_TA,TA,Rai,7,True
6,Victor_Aluno,Aluno,Victor,2,False
7,Matheus_TA,TA,Matheus,2,False
8,Lucas_TA,TA,Lucas,6,True
9,Victor_Aluno,Aluno,Victor,3,False
