In [1]:
import pandas as pd

In [2]:
# reading csv file
df = pd.read_csv('players_20.csv')
# setting index
df.set_index('short_name', inplace=True)
# selecting columns
df = df[['long_name','age','dob','height_cm','weight_kg','nationality','club']]

In [3]:
# showing dataframe
df.head()

Unnamed: 0_level_0,long_name,age,dob,height_cm,weight_kg,nationality,club
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona
Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus
Neymar Jr,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain
J. Oblak,Jan Oblak,26,1993-01-07,188,87,Slovenia,Atlético Madrid
E. Hazard,Eden Hazard,28,1991-01-07,175,74,Belgium,Real Madrid


# 1. Lambda function

In [11]:
# basic function
def sum_values(a,b):
    x = a + b
    return x

In [12]:
sum_values(2,3)

5

In [13]:
# lambda function (one liner)
sum_values_lambda = lambda a, b: a + b

# <name_of_function> = <lambda keyword> <input>: <output> the output is the operation we want to perform

In [7]:
# So this lambda function is supposed to get two values and return the sum of them

In [14]:
sum_values_lambda

<function __main__.<lambda>(a, b)>

In [15]:
sum_values_lambda(2,3)

5

In [16]:
# So we created this lambda function which is equivalent to the basic function

The lambda function is useful when we want to create a temporary function. So a function that we're going to use only once\
and then we're not going to use it anymore. So that's one of the cases when we use the lambda function over the average function.


# 2. Apply + Lambda Function

In [17]:
# Using lambda function to convert "height_cm" series to meters
df['height_cm'].apply(lambda x: x/100)

# and this x represents one row of the "height_cm" series

short_name
L. Messi             1.70
Cristiano Ronaldo    1.87
Neymar Jr            1.75
J. Oblak             1.88
E. Hazard            1.75
                     ... 
Shao Shuai           1.86
Xiao Mingjie         1.77
Zhang Wei            1.86
Wang Haijian         1.85
Pan Ximing           1.82
Name: height_cm, Length: 18278, dtype: float64

In [18]:
# and now we have the height but in meters.

In [19]:
# alternative to achieve the same task without the apply() method:
df['height_cm']/100

short_name
L. Messi             1.70
Cristiano Ronaldo    1.87
Neymar Jr            1.75
J. Oblak             1.88
E. Hazard            1.75
                     ... 
Shao Shuai           1.86
Xiao Mingjie         1.77
Zhang Wei            1.86
Wang Haijian         1.85
Pan Ximing           1.82
Name: height_cm, Length: 18278, dtype: float64

In [20]:
# and as we can see, we got the same results.

In [24]:
# Using lambda function to convert "long_name" series to upper case
df['long_name'].apply(lambda x: x.upper())

# x represents one element of this df['long_name'] series. 
# va recorriendo el vector y a cada componente, que es una string, le aplica el metodo .upper()

short_name
L. Messi                  LIONEL ANDRÉS MESSI CUCCITTINI
Cristiano Ronaldo    CRISTIANO RONALDO DOS SANTOS AVEIRO
Neymar Jr                  NEYMAR DA SILVA SANTOS JUNIOR
J. Oblak                                       JAN OBLAK
E. Hazard                                    EDEN HAZARD
                                    ...                 
Shao Shuai                                            邵帅
Xiao Mingjie                                MINGJIE XIAO
Zhang Wei                                             张威
Wang Haijian                                         汪海健
Pan Ximing                                           潘喜明
Name: long_name, Length: 18278, dtype: object

In [22]:
# and now, as we can see in the result, the names are in uppercase

In [25]:
# alternative with str attribute
df['long_name'].str.upper()

# with .str we access to the string attribute, and then we can apply the .upper() method

short_name
L. Messi                  LIONEL ANDRÉS MESSI CUCCITTINI
Cristiano Ronaldo    CRISTIANO RONALDO DOS SANTOS AVEIRO
Neymar Jr                  NEYMAR DA SILVA SANTOS JUNIOR
J. Oblak                                       JAN OBLAK
E. Hazard                                    EDEN HAZARD
                                    ...                 
Shao Shuai                                            邵帅
Xiao Mingjie                                MINGJIE XIAO
Zhang Wei                                             张威
Wang Haijian                                         汪海健
Pan Ximing                                           潘喜明
Name: long_name, Length: 18278, dtype: object

In [26]:
# and here we can see that we got the full name in uppercase.

In [27]:
df.dtypes

long_name      object
age             int64
dob            object
height_cm       int64
weight_kg       int64
nationality    object
club           object
dtype: object

In [29]:
# 'dob' is an object data type, we should convert this data type into a
# datetime by using the astype() method
df['dob'] = df['dob'].astype('datetime64[ns]')
df.dtypes

long_name              object
age                     int64
dob            datetime64[ns]
height_cm               int64
weight_kg               int64
nationality            object
club                   object
dtype: object

In [30]:
# and now this is a datetime type, and we can extract the year

In [32]:
# Using the lambda function to get the year of the "dob" series
df['dob'].apply(lambda x: x.year)

short_name
L. Messi             1987
Cristiano Ronaldo    1985
Neymar Jr            1992
J. Oblak             1993
E. Hazard            1991
                     ... 
Shao Shuai           1997
Xiao Mingjie         1997
Zhang Wei            2000
Wang Haijian         2000
Pan Ximing           1993
Name: dob, Length: 18278, dtype: int64

In [33]:
# and as we can see in the result, we got only the year. So for example,
# Messi was born in 1987, Ronaldo in 1985, Neymar in 1992, and so on..

In [34]:
# alternative with dt attribute
df['dob'].dt.year

short_name
L. Messi             1987
Cristiano Ronaldo    1985
Neymar Jr            1992
J. Oblak             1993
E. Hazard            1991
                     ... 
Shao Shuai           1997
Xiao Mingjie         1997
Zhang Wei            2000
Wang Haijian         2000
Pan Ximing           1993
Name: dob, Length: 18278, dtype: int32

In [35]:
# applying lambda function to a dataframe in order to calculate bmi
df.apply(lambda x: x['weight_kg']/ ((x['height_cm']/100)**2), axis=1)

short_name
L. Messi             24.913495
Cristiano Ronaldo    23.735308
Neymar Jr            22.204082
J. Oblak             24.615211
E. Hazard            24.163265
                       ...    
Shao Shuai           22.835010
Xiao Mingjie         21.066743
Zhang Wei            21.678807
Wang Haijian         21.621622
Pan Ximing           23.547881
Length: 18278, dtype: float64

In [36]:
# and here we got the BMI of each player.

# And that's how the lambda function can be used with the apply() method.