In [1]:
import re
import numpy as np
import pandas as pd

# Function definitions

Broken into tokens:

> <b>def </b>
- Tell python that you're starting to create a function

> <b>function_name</b>
- you give that function a name for a future call

> <b>(</b>
- Open parenthesis to give arguments (or variables) that you'll use in that functions. These helps you parametrize code.

> <b>[OPTIONAL] arg1, arg2, arg3, ... </b>
- The arguments of that function

> <b>)</b>
- CLose parenthesis to say you're done creating arguments

> <b>:</b>
- colon to say you're about to start a `code block`

> <b>code block</b>
- where you effectively do something with (or without) the arguments

> <b>return</b>
- The return statement tells you're done with the function. Whether you'll return something from that or not is optional

> <b>[OPTIONAL] something</b>
- The `something` you're allowed to retur.



```python
def function_name(arg1):
    something = arg1 + 10
    return something
```

In [None]:
def function_name(arg1):
    something = arg1 + 10
    return something

function_name(30)

In [None]:
function_name(54)

# What is a lambda function?

https://realpython.com/python-lambda/


> Named after `lambda calculus`.

> Usually refers to `anonymous functions`

## Defining lambda functions

In [None]:
def half(x):
    return x/2

In [None]:
lambda x : x/2

In [None]:
lambda arg1 : arg1 + 10

In [None]:
(lambda arg1 : arg1 + 10)(30)

In [None]:
(lambda arg1 : arg1 + 10)(35)

In [None]:
function_name = lambda arg1 : arg1 + 10

In [None]:
function_name(30)

In [4]:
# map
example_list = [1,4,5,8]

In [None]:
def half(x):
    return x / 2 

In [None]:
list(map(half, example_list))

In [None]:
list(map(lambda x : x/2, example_list))

## More than 1 argument

In [None]:
def my_sum(a , b):
    return a + b

In [14]:
add_args = lambda a, b : a + b
add_args(10, 20)

20

In [2]:
(lambda a, b : a + b)(10, 30)

40

In [None]:
from functools import reduce

In [None]:
example_list

In [None]:
reduce(lambda acc, value : acc + value, example_list)

## Conditions

In [None]:
def safe_div(num, denom):
    """
    Return the division of num by denom. 
    In case denom is 0, return 0
    """
    if denom != 0:
        return num/denom
    else:
        return 0

In [None]:
9/0

In [None]:
safe_div(9, 0)

In [27]:
example_tuples=[(10,20),(30,0),(60,30)]

In [28]:
[(a/b if b != 0 else 0) for a,b in example_tuples]

[0.5, 0, 2.0]

In [None]:
(lambda num, denom : num/denom if denom != 0 else 0)(9, 3)

# Applications



## map

In [None]:
example_list = [1,4,6,7,10,31,13]

In [None]:
def half(x):
    return x/2

In [None]:
list(map(half, example_list))

In [None]:
list(map(lambda x : x/2, example_list))

## filter

In [None]:
def check_if_even(x):
    if x % 2 == 0:
        return True
    else:
        return False
    
# def check_if_even(x):
#     return x % 2 == 0

In [None]:
example_list

In [None]:
list(filter(check_if_even, example_list))

In [None]:
list(filter(lambda x : x % 2 == 0, example_list))

## Using lambdas to order stuff


In [10]:
school_dash = ['Philosophy', 'Art-History', 'Computer-Science', 'Calculus']

In [None]:
sorted(school_dash)

In [7]:
def get_last_letter(word):
    return word[-1]

In [8]:
get_last_letter('Computer-Science')

'e'

In [11]:
sorted(school_dash, key=get_last_letter)

['Computer-Science', 'Calculus', 'Philosophy', 'Art-History']

In [None]:
sorted(school_dash, key=lambda x : x[-1])

# Ordering a dictionary by its values

In [None]:
my_dict = {'Andre': 80, 
           'Joao' : 90 , 
           'Pedro': 80, 
           'Carla': 70, 
           'Maria': 80, 
           'Aurora' : 80, 
           'Camila': 60}

In [None]:
my_dict

In [None]:
my_dict.items()

In [None]:
sorted(my_dict.items(), key=lambda x : x[1])

# Pandas apply

In [16]:
import pandas as pd

In [17]:
from tqdm.auto import tqdm
tqdm.pandas(desc="Applying transformation")

In [18]:
def create_sample_dataframe(n_rows=1000000, n_cols=1):
    """
    Create a pandas dataframe containing n_rows rows and n_cols columns
    and mess up with it by changing the dots (.) by commas (,).
    """
    cpf = np.random.randint(1, 999999999, size=n_rows)
    variables = {f'column_{col_number}': np.random.random(n_rows) for col_number in range(n_cols)}
    variables.update({'CPF': cpf})  

    return pd.DataFrame(variables).applymap(lambda x : str(x).replace('.',','))

df = create_sample_dataframe()

In [19]:
df.head()

Unnamed: 0,column_0,CPF
0,5436986317756778,451181210
1,9660024668568563,312778914
2,8408296706174745,308581585
3,9369559486926526,89376925
4,32088037966219196,634993654


In [20]:
df.column_0.progress_apply(lambda x : float(x.replace(',','.')))

HBox(children=(FloatProgress(value=0.0, description='Applying transformation', max=1000000.0, style=ProgressSt…




0         0.543699
1         0.966002
2         0.084083
3         0.009370
4         0.320880
            ...   
999995    0.763507
999996    0.433446
999997    0.607042
999998    0.148597
999999    0.136325
Name: column_0, Length: 1000000, dtype: float64

---

In [25]:
df = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/62f97ae1f8896b6b8e4bb08dcf65a07e0502aa8a/data/2020/2020-05-05/user_reviews.tsv', sep='\t')
df.head()

Unnamed: 0,grade,user_name,text,date
0,4,mds27272,My gf started playing before me. No option to ...,2020-03-20
1,5,lolo2178,"While the game itself is great, really relaxin...",2020-03-20
2,0,Roachant,My wife and I were looking forward to playing ...,2020-03-20
3,0,Houndf,We need equal values and opportunities for all...,2020-03-20
4,0,ProfessorFox,BEWARE! If you have multiple people in your h...,2020-03-20
...,...,...,...,...
2994,1,TakezoShinmen,1 Island for console limitation.I cannot play ...,2020-05-03
2995,1,Pikey17,"Per giocare con figli o fidanzate, mogli o per...",2020-05-03
2996,0,Lemmeadem,One island per console is a pathetic limitatio...,2020-05-03
2997,2,TandemTester938,Even though it seems like a great game with ma...,2020-05-03


In [None]:
import re

In [22]:
x = 'BEWARE!  If you have multiple people in your h'

In [None]:
#hoe can i find the warning?

In [None]:
df['text_warning'] = df.text.apply(lambda x : re.findall('[A-Z]+!', x))