In [3]:
import tqdm
import numpy as np
import pandas as pd


# Functional Paradigm Intro

What other paradigms we are used to?

> <b> Procedural Programming </b>
- Instructions are procedures.
- Side effects are its core.

> <b> Objected Oriented Programming </b>
- Instructions are grouped as part of a state of an object.

> <b> Functional Programming </b>
- No state exists. Just a serie of functions being evaluated. 
- No side effects.
- The solution obtained is entirely based on the input. Like in math where <code>f(x) = y</code>
- This idea leads to the fact that you can also <b>pass functions as arguments</b>. And this helps a lot.


In [4]:
def add_one(x):
    return x + 1

In [5]:
variable = 2

In [6]:
add_one(variable)

3

In [10]:
f = add_one

In [12]:
f(10)

11

In [13]:
# functions can be thought as variables as well (!)
# add_one is just a name

f = add_one

In [14]:
# now f receives add_one 

f(10)

11

In [15]:
def add_two(x):
    return x + 2

In [21]:
# so, if it can be thought as a variable, 
# can it be passed as an argument like any other variable? YES! 

def add_any(outra_funcao, qualquer_argumento, multiplicador = 1):
    return outra_funcao(qualquer_argumento) * multiplicador

In [22]:
add_any(add_one, 5)

6

In [18]:
add_any(add_two, 5)

7

# Mapping concept

In [24]:
# Simple list 
example_list = [10, 12, 34, 23, 2, 6, 7]
example_list

[10, 12, 34, 23, 2, 6, 7]

In [25]:
# define a function that performs any operation: 

def half(x):
    return x/2

## How to apply that function to all elements of this list?

In [26]:
# you cant simply:
half(example_list)

TypeError: unsupported operand type(s) for /: 'list' and 'int'

In [29]:
# using a for loop
new_list = []

for item in example_list:
    print(item, half(item))
    new_list.append(half(item))
    
new_list 

10 5.0
12 6.0
34 17.0
23 11.5
2 1.0
6 3.0
7 3.5


[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [30]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [31]:
# using list comprehensions
[half(item) for item in example_list]

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [32]:
# using mapping:

map(half, example_list)

# what it does when you map a function onto a list is the below: 

# [half(10), half(12), half(34), half(23), half(2), half(6), half(7)]

<map at 0x2d68f21f0f0>

In [33]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

Be careful, if you have a lazy item in a variable, if you execute it, it will dismantle:

In [40]:
# for example:

x = map(half, example_list)

In [41]:
x

<map at 0x2d68f268630>

In [42]:
my_results = list(x)

In [43]:
my_results

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [45]:
list(x)

[]

In [46]:
# if I run it again, it is gone:

list(x)

[]

In [51]:
x = zip([1,2,3], [4,5,6])

In [54]:
list(x)

[]

In [55]:
for item in zip([1,2,3], [4,5,6]):
    print(item)

(1, 4)
(2, 5)
(3, 6)


# Lazy evaluation

Functional programming allows the idea of not calculating the whole function at once. 

These methods return only a `python object`. This haven't calculated nothing yet. As soon as you require the results, it calculates it.

In [56]:
map(half, example_list)

<map at 0x2d68f20ac18>

In [57]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [58]:
for item in map(half, example_list):
    print(item)

5.0
6.0
17.0
11.5
1.0
3.0
3.5


In [60]:
set(map(half, example_list))

{1.0, 3.0, 3.5, 5.0, 6.0, 11.5, 17.0}

In [74]:
# next
y = map(half, example_list)
y

<map at 0x2d68f20af98>

In [75]:
# y --> half(10), half(12), half(34), half(23), half(2), half(6), half(7)
list(y)

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [76]:
# y --> None because map returns a lazy object
list(y)

[]

In [77]:
# next
y = map(half, example_list)
y

<map at 0x2d68f206160>

In [72]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [78]:
# y --> half(10), half(12), half(34), half(23), half(2), half(6), half(7)
next(y)

5.0

In [79]:
# y --> half(12), half(34), half(23), half(2), half(6), half(7)
next(y)

6.0

In [80]:
# y --> half(34), half(23), half(2), half(6), half(7)
next(y)

17.0

In [81]:
# y --> half(23), half(2), half(6), half(7)
list(y)

[11.5, 1.0, 3.0, 3.5]

In [73]:
print(next(y))
print(next(y))
print(next(y))
list(y)

5.0
6.0
17.0


[11.5, 1.0, 3.0, 3.5]

In [91]:
y = zip([1,2,3],[4,5,6])

In [97]:
y = [1,2,3]

In [100]:
list(map(half, example_list))

[5.0, 6.0, 17.0, 11.5, 1.0, 3.0, 3.5]

In [101]:
def divide_by_three(x):
    return x/3

In [106]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [105]:
list(map(divide_by_three, example_list))

[3.3333333333333335,
 4.0,
 11.333333333333334,
 7.666666666666667,
 0.6666666666666666,
 2.0,
 2.3333333333333335]

# Function definition

```python
def function_name(arg1):
    something = arg1 + 10
    return something
```

# Lambda function concept

https://realpython.com/python-lambda/


> Named after `lambda calculus`.

> Usually refers to `anonymous functions`

## Syntax:

```python
lambda argument : return_statement 

# for example
lambda x : x / 2 
```

In [None]:
# Instead of creating and naming a function that you'll use afterwards, 
# if you just want to create a function for the purpose of using it once
# you can create a lambda function:

# for example, instead of creating the function `half`, we could write:

list(map(lambda x : x / 2, example_list))

In [110]:
list(map(lambda x : x/10, example_list))

[1.0, 1.2, 3.4, 2.3, 0.2, 0.6, 0.7]

In [164]:
list(map(lambda x : x/20, example_list))

[0.5, 0.6, 1.7, 1.15, 0.1, 0.3, 0.35]

# Filter

`filter` helps removing elements of a list (or any iterator, anything you can run through) by passing a function that returns `True` or `False`. `filter` will also return a `python object`, but when you require it to show you the results, it will filter out every item that has return `False` on your function.

In [165]:
def check_if_even(x):
    """
    Return True if x is even, else return False
    """    
    return x % 2 == 0



In [166]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [169]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [168]:
list(map(check_if_even, example_list))

[True, True, True, False, True, True, False]

In [171]:
list(filter(check_if_even, example_list))

[10, 12, 34, 2, 6]

In [173]:
[item for item in example_list if item % 2 == 0]

[10, 12, 34, 2, 6]

In [174]:
list(filter(lambda x : x % 2 == 0, example_list))

[10, 12, 34, 2, 6]

# Reduce

Reduce brings the idea of an `accumulator`. Imagine you have a function that performs a `sum` for each pair of arguments. `reduce` (from the library `functools`) will consider the first argument of your function an `accumulator` and will run through your iterator recursively applying your function for pairs of items.

For example, for the list [1,4,6,8]

If you perform the following function:
```python
def sum_two_elements(a,b):
    return a+b
```

as 
```python
reduce( sum_two_elements, [1,4,6,8] )
```

The steps it will perform are:
```python

a = 1 # accumulator
b = 4 # value
a + b = 5
...
a = 5 # accumulator
b = 6 # value 
a + b = 11
...
a = 11 # accumulator
b = 8 # value
a + b = 19

return 19
```

In [176]:
from functools import reduce

In [177]:
def sum_two_elements(a,b):
    print(f'a = {a}, b={b}')
    return a+b

In [178]:
reduce(sum_two_elements, [1,4,6,8])

a = 1, b=4
a = 5, b=6
a = 11, b=8


19

In [179]:
reduce( sum_two_elements, ['Andre ','Ribeiro ', 'de ', 'Barros ', 'Aguiar'])

a = Andre , b=Ribeiro 
a = Andre Ribeiro , b=de 
a = Andre Ribeiro de , b=Barros 
a = Andre Ribeiro de Barros , b=Aguiar


'Andre Ribeiro de Barros Aguiar'

In [180]:
''.join(['Andre ','Ribeiro ', 'de ', 'Barros ', 'Aguiar'])

'Andre Ribeiro de Barros Aguiar'

In [181]:
def my_sum(acc, value):
    print(acc, value)
    if acc % 2 == 0:
        return_value = acc+value
    else:
        return_value = acc

    return return_value

In [182]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [183]:
# sum up to the sum gets an odd value
reduce(my_sum, example_list)

10 12
22 34
56 23
79 2
79 6
79 7


79

In [184]:
example_list

[10, 12, 34, 23, 2, 6, 7]

In [185]:
example_list2 = ['a','b', 'c', 'd']

In [186]:
def my_sum(a,b):
    return a + b

In [190]:
my_sum(example_list2)

TypeError: my_sum() missing 1 required positional argument: 'b'

In [188]:
reduce(my_sum, example_list2)

'abcd'

# Mapping on Pandas

> <code> df.apply() </code>

In [192]:
n = 100

In [193]:
df = pd.DataFrame(np.random.random(n), columns=['col'])

In [206]:
df > 0.5

Unnamed: 0,col
0,False
1,False
2,True
3,False
4,False
...,...
95,False
96,False
97,False
98,True


In [198]:
def greater_than_half(x):
    if x > 0.5:
        return True
    else:
        return False

In [208]:
greater_than_half(0.6)

True

In [209]:
greater_than_half(df['col'])

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [210]:
df['col']

0     0.061761
1     0.279708
2     0.918144
3     0.195016
4     0.224713
        ...   
95    0.268974
96    0.104298
97    0.148628
98    0.763148
99    0.219516
Name: col, Length: 100, dtype: float64

In [204]:
x = df['col']

In [None]:
x.map()

In [214]:
df['col'].map(greater_than_half)

0     False
1     False
2      True
3     False
4     False
      ...  
95    False
96    False
97    False
98     True
99    False
Name: col, Length: 100, dtype: bool

In [212]:
df['col']

0     0.061761
1     0.279708
2     0.918144
3     0.195016
4     0.224713
        ...   
95    0.268974
96    0.104298
97    0.148628
98    0.763148
99    0.219516
Name: col, Length: 100, dtype: float64

In [213]:
df['col'].apply(greater_than_half)

0     False
1     False
2      True
3     False
4     False
      ...  
95    False
96    False
97    False
98     True
99    False
Name: col, Length: 100, dtype: bool

In [215]:
apply(lambda x : x/2, df['col'])

NameError: name 'apply' is not defined

In [216]:
# using lambda function
df['col'].apply(lambda x : True if x > 0.5 else False)

0     False
1     False
2      True
3     False
4     False
      ...  
95    False
96    False
97    False
98     True
99    False
Name: col, Length: 100, dtype: bool

In [217]:
df = pd.DataFrame({'col': ['andre aguiar', 
                           'pablo vitar', 
                           'jair piscerni', 
                           'luis gustavo']})

In [224]:
def concatenate_strings(a):
    return a.replace(' ', '-')

In [226]:
concatenate_strings('Andre Aguiar')

# Andre-Aguiar

'Andre-Aguiar'

In [237]:
df['col'].replace(' ', '-')

0     andre aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [238]:
concatenate_strings(df['col'])

0     andre aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [240]:
df['col'].apply(concatenate_strings)

0     andre-aguiar
1      pablo-vitar
2    jair-piscerni
3     luis-gustavo
Name: col, dtype: object

In [247]:
def convert_to_upper(x):
    return x.upper()

In [248]:
convert_to_upper('andre') # ANDRE

'ANDRE'

In [249]:
convert_to_upper(df['col'])

AttributeError: 'Series' object has no attribute 'upper'

In [250]:
df['col'].apply(convert_to_upper)

0     ANDRE AGUIAR
1      PABLO VITAR
2    JAIR PISCERNI
3     LUIS GUSTAVO
Name: col, dtype: object

In [251]:
df['col']

0     andre aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [254]:
df

Unnamed: 0,col
0,andre aguiar
1,pablo vitar
2,jair piscerni
3,luis gustavo


In [None]:
df[:, ['col']].apply(lambda x : x.upper())

In [252]:
df['col']

0     andre aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [265]:
df['col'].apply(lambda x : x.replace(' ', '_') if x.startswith('a') else x)

0     andre_aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [264]:
df.loc[df['col'] == 'andre aguiar', 'col']

0    andre aguiar
Name: col, dtype: object

In [263]:
df.loc[df['col'] == 'andre aguiar', 'col'].apply(lambda x : x.upper())

0    ANDRE AGUIAR
Name: col, dtype: object

In [266]:
def sera_function(nome):
    '''
    This functions was created by Sera. 
    It replaces spaces for underlines if the name starts with a.
    '''
    if nome.startswith('a'):
        return nome.replace(' ', '_')
    else:
        return nome

In [267]:
df['col'].apply(sera_function)

0     andre_aguiar
1      pablo vitar
2    jair piscerni
3     luis gustavo
Name: col, dtype: object

In [281]:
df = pd.DataFrame(np.random.random((10,3)), columns=['col1','col2','col3'])

In [282]:
def minha_condicao(x):
    if x < 0.5:
        return 'ooooi'
    else:
        return 'tchaaau'

In [283]:
df

Unnamed: 0,col1,col2,col3
0,0.684339,0.808875,0.363325
1,0.886741,0.26903,0.656908
2,0.582506,0.902145,0.067888
3,0.05949,0.567554,0.129159
4,0.272518,0.990689,0.600572
5,0.298665,0.781178,0.591185
6,0.322531,0.870201,0.523881
7,0.15519,0.609545,0.819794
8,0.034737,0.328099,0.753273
9,0.043551,0.341029,0.39544


In [284]:
df.applymap(minha_condicao)

Unnamed: 0,col1,col2,col3
0,tchaaau,tchaaau,ooooi
1,tchaaau,ooooi,tchaaau
2,tchaaau,tchaaau,ooooi
3,ooooi,tchaaau,ooooi
4,ooooi,tchaaau,tchaaau
5,ooooi,tchaaau,tchaaau
6,ooooi,tchaaau,tchaaau
7,ooooi,tchaaau,tchaaau
8,ooooi,ooooi,tchaaau
9,ooooi,ooooi,ooooi


In [285]:
df[['col1','col2']].applymap(minha_condicao)

Unnamed: 0,col1,col2
0,tchaaau,tchaaau
1,tchaaau,ooooi
2,tchaaau,tchaaau
3,ooooi,tchaaau
4,ooooi,tchaaau
5,ooooi,tchaaau
6,ooooi,tchaaau
7,ooooi,tchaaau
8,ooooi,ooooi
9,ooooi,ooooi


> Pandas Series have both `map` and `apply`. The most used, though, is the `apply` method. 

In [312]:
df.applymap(lambda x : x * 2)

Unnamed: 0,type,name,score
0,LTLT,AndreAndre,16
1,TATA,MatheusMatheus,18
2,StudentStudent,MarcusMarcus,18


In [314]:
df.apply(lambda row : row['type'] + str(row['score']), axis=1)

0         LT8
1         TA9
2    Student9
dtype: object

In [287]:
df.apply(lambda row : 'oi' if row['col1'] + row['col2'] > 1.0 else 'tchau', axis=1)

0       oi
1       oi
2       oi
3    tchau
4       oi
5       oi
6       oi
7    tchau
8    tchau
9    tchau
dtype: object

In [None]:
# TODO:

# concatenate 'nome' and 'score'

# Apply functions with arguments.

In [310]:
def my_replace(x, index=0):
    """
    If index = 0, returns the name
    If index = 1, returns the profession
    """
    return x.replace('_',' ').split()[index]

In [296]:
example_df = pd.DataFrame({'names': ['Andre_LT','Matheus_TA','Marcus_Student']})

In [311]:
my_replace('Andre Aguiar')

'Andre'

In [302]:
example_df['names'].apply(my_replace, index=1)

0         LT
1         TA
2    Student
Name: names, dtype: object

In [303]:
example_df['profissao'] = example_df['names'].apply(my_replace, index=1)
example_df['nome'] = example_df['names'].apply(my_replace, index=0)

In [304]:
example_df

Unnamed: 0,names,profissao,nome
0,Andre_LT,LT,Andre
1,Matheus_TA,TA,Matheus
2,Marcus_Student,Student,Marcus


# Apply in axis = 1

Whenever you map (apply) on a pandas dataframe using axis=1, you'll be able to have access to the rows of the dataframe on your function.

In [305]:
example_df = pd.DataFrame({'names': ['Andre_LT','Matheus_TA','Marcus_Student']})

In [306]:
df = pd.DataFrame()
df['type'] = example_df['names'].apply(my_replace, index=1)
df['name'] = example_df['names'].apply(my_replace, index=0)
df['score'] = np.random.randint(0,10, size=df.shape[0])

In [309]:
df.apply(lambda row : row['type'] + '_' + row['name'], axis=1)

0          LT_Andre
1        TA_Matheus
2    Student_Marcus
dtype: object