# How to Apply Function to Single Column in Pandas

In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0,5,size=(5, 4)), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,4,4,0,1
1,0,2,3,4
2,0,3,3,3
3,1,1,2,0
4,0,1,3,3


## Option 1: Pandas apply function to column

In [3]:
def my_function(x):
    return x ** 2
    
df['A'].apply(my_function)

0    16
1     0
2     0
3     1
4     0
Name: A, dtype: int64

## Option 2: Pandas apply function to column by `map`

In [4]:
def my_function(x):
    return x ** 2
    
df['A'].map(my_function)

0    16
1     0
2     0
3     1
4     0
Name: A, dtype: int64

## Option 3: Pandas apply anonymous function / lambda to column

In [5]:
df['A'].map(lambda x: x ** 2)

0    16
1     0
2     0
3     1
4     0
Name: A, dtype: int64

In [6]:
df[['A', 'B']].apply(lambda x: x ** 2)

Unnamed: 0,A,B
0,16,16
1,0,4
2,0,9
3,1,1
4,0,1


## Option 4: Speed up Pandas apply function to column

In [7]:
import pandas as pd
import swifter

def fnc(x):
    return x**x/2

df = pd.DataFrame(np.random.randint(0,5,size=(100 ** 2, 4)), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,0,3,0,3
1,1,2,0,3
2,4,3,1,3
3,0,2,3,4
4,0,4,3,3
...,...,...,...,...
9995,4,3,0,1
9996,2,2,3,4
9997,4,0,2,0
9998,3,2,2,1


In [8]:
%timeit df['A'].swifter.apply(fnc)

719 µs ± 7.11 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [9]:
%timeit df["A"].apply(fnc)

3.64 ms ± 20.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
import dask.dataframe as dd
ddf = dd.from_pandas(df, npartitions=2)

In [11]:
%timeit ddf["A"].apply(fnc, meta=('A', 'int64'))

620 µs ± 4.96 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
%timeit df["A"].map(fnc)

3.55 ms ± 11.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
