In [64]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [65]:
MIN_ROWS = 1e+4

In [118]:
df = pd.DataFrame({
        "nums": [20, 22, 30, 29.1, 20],
        "b": [1, 0, 1, 1, 0],
    })

while df.shape[0] < MIN_ROWS:
    df = pd.concat([df, df], ignore_index=True)

dfr = pd.DataFrame(df)
dfr["result"] = [20] + [None]*(dfr.shape[0] - 1)

display(df)
display(dfr)

Unnamed: 0,nums,b,result
0,20.0,1,20.0
1,22.0,0,
2,30.0,1,
3,29.1,1,
4,20.0,0,
...,...,...,...
10235,20.0,1,
10236,22.0,0,
10237,30.0,1,
10238,29.1,1,


Unnamed: 0,nums,b,result
0,20.0,1,20.0
1,22.0,0,
2,30.0,1,
3,29.1,1,
4,20.0,0,
...,...,...,...
10235,20.0,1,
10236,22.0,0,
10237,30.0,1,
10238,29.1,1,


In [46]:
def some_calc_func(prev_result, prev_num, current_b):
    if current_b == 1:
        return prev_result * prev_num / 2
    else:
        return prev_num + 17

# iterrows

## pure

In [50]:
%%timeit
for index, row in df.iterrows():
    pass

427 ms ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## remember previous

In [None]:
%%timeit
iterator = df.iterrows()
_, prev_row = next(iterator)
for _, row in iterator:
    prev_row = row

# itertuples

## pure

In [107]:
%%timeit
for row in df.itertuples():
    pass

8.21 ms ± 332 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## remember previous

In [108]:
%%timeit
iterator = df.itertuples()
prev_row = next(iterator)
for row in iterator:
    prev_row = row

8.11 ms ± 70.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## assign each value

In [110]:
dfw = dfr.copy()

In [111]:
#%%timeit
iterator = dfw.itertuples()
prev_row = next(iterator)
for row in iterator:
    #print("some_calc_func", prev_row[3], prev_row[1], row[2])
    dfw.loc[row[0], "result"] = some_calc_func(dfw.loc[row[0] - 1, "result"], prev_row[1], row[2])
    prev_row = row
    #if row[0] > 10: break

In [112]:
dfw

Unnamed: 0,nums,b,result
0,20.0,1,20.0
1,22.0,0,37.0
2,30.0,1,407.0
3,29.1,1,6105.0
4,20.0,0,46.1
...,...,...,...
10235,20.0,1,461.0
10236,22.0,0,37.0
10237,30.0,1,407.0
10238,29.1,1,6105.0


## add column at once

In [113]:
dfw = df.copy()

In [114]:
#%%timeit
result = [20]
iterator = dfw.itertuples()
prev_row = next(iterator)
for row in iterator:
    #print("some_calc_func", result[-1], prev_row[1], row[2])
    result.append(some_calc_func(result[-1], prev_row[1], row[2]))
    prev_row = row
    #if row[0] > 10: break
dfw["result"] = result

In [115]:
dfw

Unnamed: 0,nums,b,result
0,20.0,1,20.0
1,22.0,0,37.0
2,30.0,1,407.0
3,29.1,1,6105.0
4,20.0,0,46.1
...,...,...,...
10235,20.0,1,461.0
10236,22.0,0,37.0
10237,30.0,1,407.0
10238,29.1,1,6105.0


# zip

## pure

In [117]:
df.columns

Index(['nums', 'b', 'result'], dtype='object')

In [None]:
%%timeit
for prev_num, curren_b in zip(df['nums'], df.loc[1:, 'b']):
    pass

In [5]:
%%timeit
result = [20]
for prev_num, curren_b in zip(df['nums'], df.loc[1:, 'b']):
    result.append(some_calc_func(df.loc[new_row, 'result'], df.loc[new_row, 'nums'], df.loc[row, 'b']))

1.02 ms ± 21.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
%%timeit
result = [20]
prev_row = df.index[0]
for i in range(1, len(df.index)):
    row = df.index[i]
    result.append(some_calc_func(df.loc[new_row, 'result'], df.loc[new_row, 'nums'], df.loc[row, 'b']))
    prev_row = row

1.5 s ± 61.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
