## Pandas: Applying function to series and DF using `.apply(func)`

- For Series: `.apply(func)` runs on every element.
- For DF: `.apply(func)` runs on every cols (`axis=1`) or rows (`axis=0`)

In [51]:
import pandas as pd

df = pd.read_csv("datasets/players_20.csv")
df.set_index("short_name", inplace=True)


In [52]:
df

Unnamed: 0_level_0,sofifa_id,player_url,long_name,age,dob,height_cm,weight_kg,nationality,club,overall,...,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
L. Messi,158023,https://sofifa.com/player/158023/lionel-messi/...,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,94,...,68+2,66+2,66+2,66+2,68+2,63+2,52+2,52+2,52+2,63+2
Cristiano Ronaldo,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus,93,...,65+3,61+3,61+3,61+3,65+3,61+3,53+3,53+3,53+3,61+3
Neymar Jr,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain,92,...,66+3,61+3,61+3,61+3,66+3,61+3,46+3,46+3,46+3,61+3
J. Oblak,200389,https://sofifa.com/player/200389/jan-oblak/20/...,Jan Oblak,26,1993-01-07,188,87,Slovenia,Atlético Madrid,91,...,,,,,,,,,,
E. Hazard,183277,https://sofifa.com/player/183277/eden-hazard/2...,Eden Hazard,28,1991-01-07,175,74,Belgium,Real Madrid,91,...,66+3,63+3,63+3,63+3,66+3,61+3,49+3,49+3,49+3,61+3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Shao Shuai,245006,https://sofifa.com/player/245006/shuai-shao/20...,邵帅,22,1997-03-10,186,79,China PR,Beijing Renhe FC,48,...,43+2,42+2,42+2,42+2,43+2,45+2,46+2,46+2,46+2,45+2
Xiao Mingjie,250995,https://sofifa.com/player/250995/mingjie-xiao/...,Mingjie Xiao,22,1997-01-01,177,66,China PR,Shanghai SIPG FC,48,...,44+2,43+2,43+2,43+2,44+2,46+2,47+2,47+2,47+2,46+2
Zhang Wei,252332,https://sofifa.com/player/252332/wei-zhang/20/...,张威,19,2000-05-16,186,75,China PR,Hebei China Fortune FC,48,...,47+2,49+2,49+2,49+2,47+2,47+2,49+2,49+2,49+2,47+2
Wang Haijian,251110,https://sofifa.com/player/251110/haijian-wang/...,汪海健,18,2000-08-02,185,74,China PR,Shanghai Greenland Shenhua FC,48,...,48+2,48+2,48+2,48+2,48+2,48+2,49+2,49+2,49+2,48+2


In [53]:
# Apply a numpy function to a series
import numpy as np

df['age']

short_name
L. Messi             32
Cristiano Ronaldo    34
Neymar Jr            27
J. Oblak             26
E. Hazard            28
                     ..
Shao Shuai           22
Xiao Mingjie         22
Zhang Wei            19
Wang Haijian         18
Pan Ximing           26
Name: age, Length: 18278, dtype: int64

In [54]:
# Example: apply np.sqrt to a function

df['age'].apply(np.sqrt)

short_name
L. Messi             5.656854
Cristiano Ronaldo    5.830952
Neymar Jr            5.196152
J. Oblak             5.099020
E. Hazard            5.291503
                       ...   
Shao Shuai           4.690416
Xiao Mingjie         4.690416
Zhang Wei            4.358899
Wang Haijian         4.242641
Pan Ximing           5.099020
Name: age, Length: 18278, dtype: float64

In [55]:
# Apply own function to a DataFrame

def calculate_bmi(row):
    return row["weight_kg"] / ((row["height_cm"] / 100) ** 2)

def check_dtype(r):
    print(type(r))
    try:
        print(r.name)
    except:
        pass
    print("--")


In [56]:
# Check the input's datatype for each case

# Case 1. Applying function to the whole DF: the function is run on every row (Series).
_ = df.apply(check_dtype, axis=1)

<class 'pandas.core.series.Series'>
L. Messi
--
<class 'pandas.core.series.Series'>
Cristiano Ronaldo
--
<class 'pandas.core.series.Series'>
Neymar Jr
--
<class 'pandas.core.series.Series'>
J. Oblak
--
<class 'pandas.core.series.Series'>
E. Hazard
--
<class 'pandas.core.series.Series'>
K. De Bruyne
--
<class 'pandas.core.series.Series'>
M. ter Stegen
--
<class 'pandas.core.series.Series'>
V. van Dijk
--
<class 'pandas.core.series.Series'>
L. Modrić
--
<class 'pandas.core.series.Series'>
M. Salah
--
<class 'pandas.core.series.Series'>
K. Mbappé
--
<class 'pandas.core.series.Series'>
K. Koulibaly
--
<class 'pandas.core.series.Series'>
H. Kane
--
<class 'pandas.core.series.Series'>
Alisson
--
<class 'pandas.core.series.Series'>
De Gea
--
<class 'pandas.core.series.Series'>
N. Kanté
--
<class 'pandas.core.series.Series'>
G. Chiellini
--
<class 'pandas.core.series.Series'>
S. Agüero
--
<class 'pandas.core.series.Series'>
Sergio Ramos
--
<class 'pandas.core.series.Series'>
L. Suárez
--
<clas

In [57]:
# Case 2. Applying function to a Series
_ = df.dob.apply(check_dtype)

<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>
--
<class 'str'>


In [58]:
# Run BMI function on the DF

df.apply(calculate_bmi, axis=1)

short_name
L. Messi             24.913495
Cristiano Ronaldo    23.735308
Neymar Jr            22.204082
J. Oblak             24.615211
E. Hazard            24.163265
                       ...    
Shao Shuai           22.835010
Xiao Mingjie         21.066743
Zhang Wei            21.678807
Wang Haijian         21.621622
Pan Ximing           23.547881
Length: 18278, dtype: float64

In [60]:
# Use .apply() with lambda function
# example: convert "height_cm" series to meters

df.apply(lambda row: row.height_cm / 100, axis=1)

short_name
L. Messi             1.70
Cristiano Ronaldo    1.87
Neymar Jr            1.75
J. Oblak             1.88
E. Hazard            1.75
                     ... 
Shao Shuai           1.86
Xiao Mingjie         1.77
Zhang Wei            1.86
Wang Haijian         1.85
Pan Ximing           1.82
Length: 18278, dtype: float64

In [62]:
# Convert "long_name" to uppercase
df.apply(lambda row: row.long_name.upper(), axis=1)

short_name
L. Messi                  LIONEL ANDRÉS MESSI CUCCITTINI
Cristiano Ronaldo    CRISTIANO RONALDO DOS SANTOS AVEIRO
Neymar Jr                  NEYMAR DA SILVA SANTOS JUNIOR
J. Oblak                                       JAN OBLAK
E. Hazard                                    EDEN HAZARD
                                    ...                 
Shao Shuai                                            邵帅
Xiao Mingjie                                MINGJIE XIAO
Zhang Wei                                             张威
Wang Haijian                                         汪海健
Pan Ximing                                           潘喜明
Length: 18278, dtype: object

In [63]:
# Alternative
df.long_name.str.upper()

short_name
L. Messi                  LIONEL ANDRÉS MESSI CUCCITTINI
Cristiano Ronaldo    CRISTIANO RONALDO DOS SANTOS AVEIRO
Neymar Jr                  NEYMAR DA SILVA SANTOS JUNIOR
J. Oblak                                       JAN OBLAK
E. Hazard                                    EDEN HAZARD
                                    ...                 
Shao Shuai                                            邵帅
Xiao Mingjie                                MINGJIE XIAO
Zhang Wei                                             张威
Wang Haijian                                         汪海健
Pan Ximing                                           潘喜明
Name: long_name, Length: 18278, dtype: object

In [71]:
# Lambda function to get the Year part of DOB

df.apply(lambda r: pd.to_datetime(r.dob).year, axis=1)


short_name
L. Messi             1987
Cristiano Ronaldo    1985
Neymar Jr            1992
J. Oblak             1993
E. Hazard            1991
                     ... 
Shao Shuai           1997
Xiao Mingjie         1997
Zhang Wei            2000
Wang Haijian         2000
Pan Ximing           1993
Length: 18278, dtype: int64

In [72]:

# Using dt attribute (faster)
df.dob.astype("datetime64[s]").dt.year

short_name
L. Messi             1987
Cristiano Ronaldo    1985
Neymar Jr            1992
J. Oblak             1993
E. Hazard            1991
                     ... 
Shao Shuai           1997
Xiao Mingjie         1997
Zhang Wei            2000
Wang Haijian         2000
Pan Ximing           1993
Name: dob, Length: 18278, dtype: int32

In [75]:
# Calculate BMI with lambda + .apply()

df.apply(lambda row: row.weight_kg /  (row.height_cm / 100) **2, axis=1 )

short_name
L. Messi             24.913495
Cristiano Ronaldo    23.735308
Neymar Jr            22.204082
J. Oblak             24.615211
E. Hazard            24.163265
                       ...    
Shao Shuai           22.835010
Xiao Mingjie         21.066743
Zhang Wei            21.678807
Wang Haijian         21.621622
Pan Ximing           23.547881
Length: 18278, dtype: float64