# Why use apply() and tranform() on DataFrame?

- Both apply() and transform() are used to manipulate an entire DataFrame or any specific column in given DataFrame. 


# 3 main differences


- transform() can take a function, a string function, a list of functions, and a dict. However, apply() is only allowed a function.
-  transform() cannot produce aggregated results
- apply() works with multiple Series at a time. But, transform() is only allowed to work with a single Series at a time.


In [5]:
import pandas as pd
import numpy as np

In [6]:
remote_url = "https://raw.githubusercontent.com/prodramp/publiccode/master/datasets/titanic.csv"

In [7]:
df = pd.read_csv(remote_url)

In [8]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [4]:
df = pd.DataFrame({'A': [1,2,3], 'B': [10,20,30] })

In [5]:
df

Unnamed: 0,A,B
0,1,10
1,2,20
2,3,30


In [6]:
def plus_10(x):
    return x+10

In [7]:
df.apply(plus_10)

Unnamed: 0,A,B
0,11,20
1,12,30
2,13,40


In [8]:
df

Unnamed: 0,A,B
0,1,10
1,2,20
2,3,30


In [9]:
df.transform(plus_10)

Unnamed: 0,A,B
0,11,20
1,12,30
2,13,40


In [10]:
df.apply(lambda x: x+10)


Unnamed: 0,A,B
0,11,20
1,12,30
2,13,40


In [11]:
df

Unnamed: 0,A,B
0,1,10
1,2,20
2,3,30


In [12]:
df.transform(lambda x: x+10)

Unnamed: 0,A,B
0,11,20
1,12,30
2,13,40


In [13]:
df.transform(lambda x: x+10)

Unnamed: 0,A,B
0,11,20
1,12,30
2,13,40


In [14]:
df['B_ap'] = df['B'].apply(plus_10)

In [15]:
df

Unnamed: 0,A,B,B_ap
0,1,10,20
1,2,20,30
2,3,30,40


In [16]:
# The lambda equivalent
df['B_ap'] = df['B'].apply(lambda x: x+10)

In [17]:
df

Unnamed: 0,A,B,B_ap
0,1,10,20
1,2,20,30
2,3,30,40


In [18]:
df['B_tr'] = df['B'].transform(plus_10)

In [19]:
df

Unnamed: 0,A,B,B_ap,B_tr
0,1,10,20,20
1,2,20,30,30
2,3,30,40,40


In [20]:
# The lambda equivalent
df['B_tr'] = df['B'].transform(lambda x: x+10)

In [21]:
df

Unnamed: 0,A,B,B_ap,B_tr
0,1,10,20,20
1,2,20,30,30
2,3,30,40,40


# 3 main differences
- transform() works with function, a string function, a list of functions, and a dict. However, apply() is only allowed with function.
- transform() cannot produce aggregated results.
- apply() works with multiple Series at a time. But, transform() is only allowed to work with a single Series at a time.


In [22]:
df.transform('sqrt')

Unnamed: 0,A,B,B_ap,B_tr
0,1.0,3.162278,4.472136,4.472136
1,1.414214,4.472136,5.477226,5.477226
2,1.732051,5.477226,6.324555,6.324555


In [None]:
df

In [25]:
df.transform([np.sqrt, np.exp])

Unnamed: 0_level_0,A,A,B,B,B_ap,B_ap,B_tr,B_tr
Unnamed: 0_level_1,sqrt,exp,sqrt,exp,sqrt,exp,sqrt,exp
0,1.0,2.718282,3.162278,22026.47,4.472136,485165200.0,4.472136,485165200.0
1,1.414214,7.389056,4.472136,485165200.0,5.477226,10686470000000.0,5.477226,10686470000000.0
2,1.732051,20.085537,5.477226,10686470000000.0,6.324555,2.353853e+17,6.324555,2.353853e+17


In [26]:
df.transform({
    'A': np.sqrt,
    'B': np.exp,
})

Unnamed: 0,A,B
0,1.0,22026.47
1,1.414214,485165200.0
2,1.732051,10686470000000.0


In [27]:
df.apply(lambda x:x.sum())

A        6
B       60
B_ap    90
B_tr    90
dtype: int64

In [29]:
# You will get an Error
#df.transform(lambda x:x.sum())

#Error Message>>> ValueError: transforms cannot produce aggregated results

In [30]:
def subtract_two(x):
    return x['B'] - x['A']

In [31]:
df.apply(subtract_two, axis=1)

0     9
1    18
2    27
dtype: int64

In [35]:
# You will get an Error
#df.transform(subtract_two, axis=1)

# ValueError: transforms cannot produce aggregated results

In [37]:
# It is working
df.apply(lambda x: x['B'] - x['A'], axis=1)

0     9
1    18
2    27
dtype: int64

In [40]:
# Getting same error
#df.transform(lambda x: x['B'] - x['A'], axis=1)

# ValueError: transforms cannot produce aggregated results