Applying a Function on a Pandas Series

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display

students = pd.Series(data=[180, 175, 168, 190],
                     index=['Vik', 'Mehdi', 'Bella', 'Chriss'])
display(students)
print(type(students))

Vik       180
Mehdi     175
Bella     168
Chriss    190
dtype: int64

<class 'pandas.core.series.Series'>


In [2]:
def cm_to_feet(h):
    return np.round(h/30.48, 2)

print(students.apply(cm_to_feet))

Vik       5.91
Mehdi     5.74
Bella     5.51
Chriss    6.23
dtype: float64


Applying a Function on a Pandas DataFram

In [3]:
data = pd.DataFrame({'EmployeeName': ['Callen Dunkley', 'Sarah Rayner', 'Jeanette Sloan', 'Kaycee Acosta', 'Henri Conroy', 'Emma Peralta', 'Martin Butt', 'Alex Jensen', 'Kim Howarth', 'Jane Burnett'],
                    'Department': ['Accounting', 'Engineering', 'Engineering', 'HR', 'HR', 'HR', 'Data Science', 'Data Science', 'Accounting', 'Data Science'],
                    'HireDate': [2010, 2018, 2012, 2014, 2014, 2018, 2020, 2018, 2020, 2012],
                    'Sex': ['M', 'F', 'F', 'F', 'M', 'F', 'M', 'M', 'M', 'F'],
                    'Birthdate': ['04/09/1982', '14/04/1981', '06/05/1997', '08/01/1986', '10/10/1988', '12/11/1992', '10/04/1991', '16/07/1995', '08/10/1992', '11/10/1979'],
                    'Weight': [78, 80, 66, 67, 90, 57, 115, 87, 95, 57],
                    'Height': [176, 160, 169, 157, 185, 164, 195, 180, 174, 165],
                    'Kids': [2, 1, 0, 1, 1, 0, 2, 0, 3, 1]
                    })
display(data)

Unnamed: 0,EmployeeName,Department,HireDate,Sex,Birthdate,Weight,Height,Kids
0,Callen Dunkley,Accounting,2010,M,04/09/1982,78,176,2
1,Sarah Rayner,Engineering,2018,F,14/04/1981,80,160,1
2,Jeanette Sloan,Engineering,2012,F,06/05/1997,66,169,0
3,Kaycee Acosta,HR,2014,F,08/01/1986,67,157,1
4,Henri Conroy,HR,2014,M,10/10/1988,90,185,1
5,Emma Peralta,HR,2018,F,12/11/1992,57,164,0
6,Martin Butt,Data Science,2020,M,10/04/1991,115,195,2
7,Alex Jensen,Data Science,2018,M,16/07/1995,87,180,0
8,Kim Howarth,Accounting,2020,M,08/10/1992,95,174,3
9,Jane Burnett,Data Science,2012,F,11/10/1979,57,165,1


Scenario 1

In [4]:
data['FirstName'] = data['EmployeeName'].apply(lambda x : x.split()[0])
data['LastName'] = data['EmployeeName'].apply(lambda x : x.split()[1])
display(data)

Unnamed: 0,EmployeeName,Department,HireDate,Sex,Birthdate,Weight,Height,Kids,FirstName,LastName
0,Callen Dunkley,Accounting,2010,M,04/09/1982,78,176,2,Callen,Dunkley
1,Sarah Rayner,Engineering,2018,F,14/04/1981,80,160,1,Sarah,Rayner
2,Jeanette Sloan,Engineering,2012,F,06/05/1997,66,169,0,Jeanette,Sloan
3,Kaycee Acosta,HR,2014,F,08/01/1986,67,157,1,Kaycee,Acosta
4,Henri Conroy,HR,2014,M,10/10/1988,90,185,1,Henri,Conroy
5,Emma Peralta,HR,2018,F,12/11/1992,57,164,0,Emma,Peralta
6,Martin Butt,Data Science,2020,M,10/04/1991,115,195,2,Martin,Butt
7,Alex Jensen,Data Science,2018,M,16/07/1995,87,180,0,Alex,Jensen
8,Kim Howarth,Accounting,2020,M,08/10/1992,95,174,3,Kim,Howarth
9,Jane Burnett,Data Science,2012,F,11/10/1979,57,165,1,Jane,Burnett


Scenario 2

In [6]:
from datetime import datetime, date

def calculate_age(birthdate):
    birthdate = datetime.strptime(birthdate) 
    today = date.today()
    return today.year - birthdate.year - (today.month < birthdate.month)

In [None]:
data['Age'] = data['Birthdate'].apply(calculate_age)
display(data)

In [None]:
print(data['Age'].mean())

Scenario 3

In [8]:
def calc_bmi(weight, height):
    return np.round(weight/(height/100)**2, 2)

In [9]:
data['BMI'] = data.apply(lambda x: calc_bmi(x['Weight'], x['Height']), axis=1)

In [10]:
display(data)

Unnamed: 0,EmployeeName,Department,HireDate,Sex,Birthdate,Weight,Height,Kids,FirstName,LastName,BMI
0,Callen Dunkley,Accounting,2010,M,04/09/1982,78,176,2,Callen,Dunkley,25.18
1,Sarah Rayner,Engineering,2018,F,14/04/1981,80,160,1,Sarah,Rayner,31.25
2,Jeanette Sloan,Engineering,2012,F,06/05/1997,66,169,0,Jeanette,Sloan,23.11
3,Kaycee Acosta,HR,2014,F,08/01/1986,67,157,1,Kaycee,Acosta,27.18
4,Henri Conroy,HR,2014,M,10/10/1988,90,185,1,Henri,Conroy,26.3
5,Emma Peralta,HR,2018,F,12/11/1992,57,164,0,Emma,Peralta,21.19
6,Martin Butt,Data Science,2020,M,10/04/1991,115,195,2,Martin,Butt,30.24
7,Alex Jensen,Data Science,2018,M,16/07/1995,87,180,0,Alex,Jensen,26.85
8,Kim Howarth,Accounting,2020,M,08/10/1992,95,174,3,Kim,Howarth,31.38
9,Jane Burnett,Data Science,2012,F,11/10/1979,57,165,1,Jane,Burnett,20.94
