In [2]:
import numpy as np
import pandas as pd

# Let's generate random data frame with 1000 rows
df = pd.DataFrame({'test': np.random.sample(1000)})
df.head()

Unnamed: 0,test
0,0.115094
1,0.536578
2,0.308461
3,0.963758
4,0.351376


In [3]:
# Let's say we want to add 1 to 'data' column of all the columns.

# We can either do it with "vectorized addition" (which can add just a number to a vector), like this:
df.test = df.test + 1
df.head()

Unnamed: 0,test
0,1.115094
1,1.536578
2,1.308461
3,1.963758
4,1.351376


In [4]:
# Or we can try to do it with iteration:
for _, row in df.iterrows(): 
    row.test += 1
df.head()

Unnamed: 0,test
0,2.115094
1,2.536578
2,2.308461
3,2.963758
4,2.351376


In [5]:
# Let's measure how fast are both operations:

# Vectorized:
%timeit df.test = df.test + 1

275 µs ± 6.56 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [6]:
# Iteration:
%timeit for _, row in df.iterrows(): row.test += 1

91.4 ms ± 3.01 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
ratio = 91.4e-3 / 275e-6
ratio

332.3636363636363

In [8]:
# So, as you see iteration in this case is around 300 times slower!