In [1]:
import numpy as np
import pandas as pd

In [2]:
frame = pd.DataFrame(np.random.standard_normal((4, 3)),
                    columns=list('bde'),
                    index=['Utah', 'Ohio', 'Texas', 'Oregon'])

In [3]:
frame

Unnamed: 0,b,d,e
Utah,-0.282143,-1.613046,-0.990371
Ohio,-0.900439,-0.832847,-0.459571
Texas,-0.549495,0.241596,-0.731022
Oregon,0.850068,1.854039,0.90598


In [4]:
#we can use numpy ufuncs with Pandas objects
np.abs(frame)

Unnamed: 0,b,d,e
Utah,0.282143,1.613046,0.990371
Ohio,0.900439,0.832847,0.459571
Texas,0.549495,0.241596,0.731022
Oregon,0.850068,1.854039,0.90598


In [5]:
np.exp(frame)

Unnamed: 0,b,d,e
Utah,0.754166,0.19928,0.371439
Ohio,0.406391,0.434809,0.631555
Texas,0.577241,1.273279,0.481417
Oregon,2.339805,6.385561,2.474355


In [6]:
frame

Unnamed: 0,b,d,e
Utah,-0.282143,-1.613046,-0.990371
Ohio,-0.900439,-0.832847,-0.459571
Texas,-0.549495,0.241596,-0.731022
Oregon,0.850068,1.854039,0.90598


In [7]:
np.add(frame, np.ones_like(frame))

Unnamed: 0,b,d,e
Utah,0.717857,-0.613046,0.009629
Ohio,0.099561,0.167153,0.540429
Texas,0.450505,1.241596,0.268978
Oregon,1.850068,2.854039,1.90598


In [8]:
#apply function to one-dimensional array
def f1(x):
    return max(x) - min(x)

In [9]:
frame.apply(f1)

b    1.750506
d    3.467086
e    1.896351
dtype: float64

In [10]:
#apply across rows with df.apply(func, axis=''columns)
frame.apply(f1, axis='columns')

Utah      1.330903
Ohio      0.440868
Texas     0.972617
Oregon    1.003972
dtype: float64

In [11]:
#Functions can return scalar values like above, or they can return series values
def f2(x: pd.DataFrame) -> pd.Series:
    return pd.Series([x.min(), x.max()], index=['min', 'max'])

In [12]:
frame.apply(f2)

Unnamed: 0,b,d,e
min,-0.900439,-1.613046,-0.990371
max,0.850068,1.854039,0.90598


In [14]:
#applymap: apply a function to each element of the frame
%time

def my_format(x):
    return f'{x:.2f}'

frame.applymap(my_format)

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 10 µs


Unnamed: 0,b,d,e
Utah,-0.28,-1.61,-0.99
Ohio,-0.9,-0.83,-0.46
Texas,-0.55,0.24,-0.73
Oregon,0.85,1.85,0.91


In [17]:
%%time
#applymap is like applying a function to each row like this
#until now I've used this in my pandas and dask projects, time for a change
#since this is much slower
frame.apply(lambda x: x.apply(lambda y: my_format(y)))

CPU times: user 2.06 ms, sys: 95 µs, total: 2.15 ms
Wall time: 2.18 ms


Unnamed: 0,b,d,e
Utah,-0.28,-1.61,-0.99
Ohio,-0.9,-0.83,-0.46
Texas,-0.55,0.24,-0.73
Oregon,0.85,1.85,0.91


In [18]:
%%time
#applymap is also like this
frame.apply(lambda x: x.apply(my_format))

CPU times: user 2 ms, sys: 22 µs, total: 2.02 ms
Wall time: 2.03 ms


Unnamed: 0,b,d,e
Utah,-0.28,-1.61,-0.99
Ohio,-0.9,-0.83,-0.46
Texas,-0.55,0.24,-0.73
Oregon,0.85,1.85,0.91


In [20]:
#df.applymap gets its name from the applymap Series method
frame['b'].map(my_format)

Utah      -0.28
Ohio      -0.90
Texas     -0.55
Oregon     0.85
Name: b, dtype: object

In [21]:
frame['d'].map(my_format)

Utah      -1.61
Ohio      -0.83
Texas      0.24
Oregon     1.85
Name: d, dtype: object

In [22]:
frame['e'].map(my_format)

Utah      -0.99
Ohio      -0.46
Texas     -0.73
Oregon     0.91
Name: e, dtype: object