In [None]:
import tqdm
import numpy as np
import pandas as pd


# Functional Paradigm Intro

What other paradigms we have experienced?

> <b> Procedural Programming </b>
- Instructions are procedures.
- Side effects are its core.

> <b> Objected Oriented Programming </b>
- Instructions are grouped as part of a state of an object.

> <b> Functional Programming </b>
- No state exists. Just a serie of functions being evaluated. 
- No side effects.
- The solution obtained is entirely based on the input. Like in math where <code>f(x) = y</code>
- This idea leads to the fact that you can also <b>pass functions as arguments</b>. And this helps a lot.


In [1]:
def add_one(x):
    return x + 1

In [2]:
x = 2

In [3]:
# functions can be thought as variables as well (!)
# add_one is just a name

f = add_one

In [4]:
# now f receives add_one 

f(10)

11

In [5]:
def add_two(x):
    return x + 2

In [8]:
# so, if it can be thought as a variable, 
# can it be passed as an argument like any other variable? YES! 

def add_any(f, x):
    return f(x)

In [9]:
add_any(add_one, 3)

4

In [10]:
add_any(add_two, 3)

5

# Function definition

```python
def function_name(arg1):
    something = arg1 + 10
    return something
```

# Mapping concept

In [11]:
# Simple list 
example_list = [10, 12, 34, 23, 2, 6, 7]

In [12]:
# define a function that performs any operation: 

def half(x):
    return x/2

## How to apply that function to all elements of this list?

In [13]:
# you cant simply:

half(example_list)

TypeError: unsupported operand type(s) for /: 'list' and 'int'

In [14]:
# using a for loop
new_list = []

for item in example_list:
    new_list.append(half(item))
    
new_list 

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [15]:
# using list comprehensions
[half(item) for item in example_list]

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [16]:
# using mapping:

map(half, example_list)

# what it does when you map a function onto a list is the below: 

# [half(10), half(12), half(34), half(23), half(2), half(6), half(7)]

<map at 0x1a949e418b0>

Map is called `lazy`. When you run `map(function, my_list)`, it doesn't execute anything. It just stores what it needs to perform. Whenever you call it, it washes out the result.

In [17]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

# Lazy evaluation

Functional programming allows the idea of not calculating the whole function at once. 

These methods return only a `python object`. This haven't calculated nothing yet. As soon as you require the results, it calculates it.

In [18]:
map(half, example_list)

<map at 0x1a949e41820>

In [19]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [20]:
for item in map(half, example_list):
    print(item)

5.0
6.0
17.0
11.5
1.0
3.0
3.5


In [21]:
set(map(half, example_list))

{1.0, 3.0, 3.5, 5.0, 6.0, 11.5, 17.0}

# Filter

`filter` helps removing elements of a list (or any iterator, anything you can run through) by passing a function that returns `True` or `False`. `filter` will also return a `python object`, but when you require it to show you the results, it will filter out every item that has return `False` on your function.

In [22]:
def check_if_even(x):
    """
    Return True if x is even, else return False"""
    
    
    return x % 2 == 0

In [23]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [24]:
filter(check_if_even, example_list)

<filter at 0x1a949e41d60>

In [25]:
list(filter(check_if_even, example_list))

[10, 12, 34, 2, 6]

In [None]:
[item for item in example_list if item % 2 == 0]

In [None]:
list(filter(check_if_even, example_list))

# Reduce

Reduce brings the idea of an `accumulator`. Imagine you have a function that performs a `sum` for each pair of arguments. `reduce` (from the library `functools`) will consider the first argument of your function an `accumulator` and will run through your iterator recursively applying your function for pairs of items.

For example, for the list [1,4,6,8]

If you perform the following function:
```python
def sum_two_elements(a,b):
    return a+b
```

as 
```python
reduce( sum_two_elements, [1,4,6,8] )
```

The steps it will perform are:
```python
a = 0 # accumulator
b = 1 # value
a + b = 1 # so the accumulator receives this cummulative sum

a = 1 # accumulator
b = 4 # value
a + b = 5
...
a = 5 # accumulator
b = 6 # value 
a + b = 11
...
a = 11 # accumulator
b = 8 # value
a + b = 19

return 19
```

In [26]:
from functools import reduce

In [27]:
def sum_two_elements(a,b):
    print(f'a = {a}, b={b}')
    return a+b

In [28]:
reduce( sum_two_elements, [1,4,6,8])

a = 1, b=4
a = 5, b=6
a = 11, b=8


19

In [29]:
reduce( sum_two_elements, ['Andre ','Ribeiro ', 'de ', 'Barros ', 'Aguiar'])

a = Andre , b=Ribeiro 
a = Andre Ribeiro , b=de 
a = Andre Ribeiro de , b=Barros 
a = Andre Ribeiro de Barros , b=Aguiar


'Andre Ribeiro de Barros Aguiar'

In [30]:
''.join(['Andre ','Ribeiro ', 'de ', 'Barros ', 'Aguiar'])

'Andre Ribeiro de Barros Aguiar'

In [31]:
def my_sum(acc, value):
    print(acc, value)
    if acc % 2 == 0:
        return_value = acc+value
    else:
        return_value = acc

    return return_value

In [32]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [33]:
# sum up to the sum gets an odd value
reduce(my_sum, example_list)

10 12
22 34
56 23
79 2
79 6
79 7


79

In [34]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [35]:
example_list2 = ['a','b', 'c', 'd']

In [36]:
def my_sum(a,b):
    return a + b

In [37]:
sum(example_list2)

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [38]:
reduce(my_sum, example_list2)

'abcd'

# Mapping on Pandas

> <code> df['col_name'].apply() </code>

In [44]:
import pandas as pd
import numpy as np

In [45]:
n = 100

In [46]:
df = pd.DataFrame(np.random.random(n), columns=['number'])

In [47]:
df

Unnamed: 0,number
0,0.715044
1,0.559693
2,0.722279
3,0.671250
4,0.982239
...,...
95,0.051356
96,0.595162
97,0.281860
98,0.462667


In [48]:
def greater_than_half(x):
    if x > 0.5:
        return True
    else:
        return False

In [49]:
greater_than_half(df['number'])

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [51]:
df['number'].apply(greater_than_half)

0      True
1      True
2      True
3      True
4      True
      ...  
95    False
96     True
97    False
98    False
99    False
Name: number, Length: 100, dtype: bool

> Pandas Series have both `map` and `apply`. The most used, though, is the `apply` method. 

In [53]:
df['is_greater_than_half'] = df['number'].apply(greater_than_half)

In [54]:
df

Unnamed: 0,number,is_greater_than_half
0,0.715044,True
1,0.559693,True
2,0.722279,True
3,0.671250,True
4,0.982239,True
...,...,...
95,0.051356,False
96,0.595162,True
97,0.281860,False
98,0.462667,False


---

In [55]:
import re

In [56]:
names = ['andre', 'Andre', 'André','ANDRE','ANDRÉ', 'Joao','Carlos', 'Maria', 'Jose']
df = pd.DataFrame(np.random.choice(names, n), columns=['names'])
df

Unnamed: 0,names
0,Maria
1,Carlos
2,andre
3,Carlos
4,Andre
...,...
95,ANDRE
96,ANDRE
97,andre
98,ANDRE


In [57]:
df['names'].value_counts()

André     14
andre     14
Jose      14
ANDRE     12
ANDRÉ     12
Joao      11
Maria      9
Andre      8
Carlos     6
Name: names, dtype: int64

In [None]:
## task: replace all occurrences of my name to Andre

In [62]:
def change_names(name):
    return re.sub('andr[eé]', 'Andre', name, flags=re.IGNORECASE)

In [63]:
df['names'] = df['names'].apply(change_names)
df['names']

0      Maria
1     Carlos
2      Andre
3     Carlos
4      Andre
       ...  
95     Andre
96     Andre
97     Andre
98     Andre
99     Maria
Name: names, Length: 100, dtype: object

In [64]:
df['names'].value_counts()

Andre     60
Jose      14
Joao      11
Maria      9
Carlos     6
Name: names, dtype: int64

# Apply functions with arguments.

In [65]:
def my_replace(x, index):
    """
    If index = 0, returns the name
    If index = 1, returns the profession
    """
    return x.replace('_',' ').split()[index]

In [66]:
example_df = pd.DataFrame({'names': ['Andre_LT','Matheus_TA','Joao_Student','Jose_Student']})

In [67]:
my_replace('Matheus_TA', 0)

'Matheus'

In [70]:
example_df['profissao'] = example_df['names'].apply(my_replace, index=1)
example_df['nome'] = example_df['names'].apply(my_replace, index=0)
example_df

Unnamed: 0,names,profissao,nome
0,Andre_LT,LT,Andre
1,Matheus_TA,TA,Matheus
2,Joao_Student,Student,Joao
3,Jose_Student,Student,Jose


# Apply in axis = 1

Whenever you map (apply) on a pandas dataframe using axis=1, you'll be able to have access to the rows of the dataframe on your function.

In [71]:
df = pd.DataFrame()
df['type'] = example_df['names'].apply(my_replace, index=1)
df['name'] = example_df['names'].apply(my_replace, index=0)
df['score'] = [6, 7, 8, 7]

In [72]:
df

Unnamed: 0,type,name,score
0,LT,Andre,6
1,TA,Matheus,7
2,Student,Joao,8
3,Student,Jose,7


In [73]:
def has_passed(row):
    if row['type'] == 'Student':
        if row['score'] > 7:
            return 'pass'
        else:
            return 'fail'
    else:
        if row['score'] > 6:
            return 'pass'
        else:
            return 'fail'        

In [74]:
df.apply(has_passed, axis=1)

0    fail
1    pass
2    pass
3    fail
dtype: object

In [75]:
df

Unnamed: 0,type,name,score
0,LT,Andre,6
1,TA,Matheus,7
2,Student,Joao,8
3,Student,Jose,7
