# Rolling Calculations
**Problem Statement:** Given a dataframe, you'd like to apply a rolling calculation to one (or more columns), where rolling means that the result in one row depends on the previous row. The result of the calculation should be available as additional column in your dataframe.

In [1]:
import seaborn as sns
import pandas as pd

In [6]:
df = sns.load_dataset('flights')
df.head()

Unnamed: 0,year,month,passengers
0,1949,Jan,112
1,1949,Feb,118
2,1949,Mar,132
3,1949,Apr,129
4,1949,May,121


In [11]:
def for_loop(d):
    def calc(d):
        result = []
        previous = 0
        for i in range(0, len(d)):
            result += [d.passengers.loc[i]+previous]
            previous += d.passengers.loc[i]
        return result

    return d.assign(
            cumsum = calc(d)
        )



In [18]:
def for_loop_iterrows(d):
    def calc(d):
        result = []
        previous = 0
        for _, row in d.iterrows():
            result += [row.passengers + previous]
            previous += row.passengers
        return result

    return d.assign(
            cumsum = calc(d)
        )


In [25]:
def for_loop_itertuples(d):
    def calc(d):
        result = []
        previous = 0
        for row in d.itertuples():
            result += [row.passengers + previous]
            previous += row.passengers
        return result

    return d.assign(
            cumsum = calc(d)
        )

In [27]:
def pandas_cumsum(d):
    return d.assign(
        cumsum = lambda d: d.passengers.cumsum()
    )

In [33]:
def numpy_array_iteration(d):
    def calc(d):
        result = []
        previous = 0
        for e in d.passengers.values:
            result += [e + previous]
            previous += e
        return result

    return d.assign(
        cumsum = lambda d: calc(d)
    )


In [37]:
def dict_iteration(d):
    def calc(d):
        d_dict = d.to_dict("records")
        result = []
        previous = 0
        for row in d_dict:
            result += [row["passengers"] + previous]
            previous += row["passengers"]
        return result

    return d.assign(
        cumsum = lambda d: calc(d)
    )


In [19]:
%%timeit
for_loop(df)

8.02 ms ± 2.08 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
%%timeit
for_loop_iterrows(df)

11.6 ms ± 1.47 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [26]:
%%timeit
for_loop_itertuples(df)

1.27 ms ± 16.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [35]:
%%timeit
pandas_cumsum(df)

876 µs ± 189 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [45]:
%%timeit
numpy_array_iteration(df)

823 µs ± 70.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [40]:
%%timeit
dict_iteration(df)

1.52 ms ± 129 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [30]:
pd.testing.assert_frame_equal(
    for_loop(df),
    for_loop_iterrows(df)
)

In [31]:
pd.testing.assert_frame_equal(
    for_loop(df),
    for_loop_itertuples(df)
)

In [32]:
pd.testing.assert_frame_equal(
    for_loop(df),
    pandas_cumsum(df)
)

In [43]:
pd.testing.assert_frame_equal(
    for_loop(df),
    numpy_array_iteration(df)
)

In [44]:
pd.testing.assert_frame_equal(
    for_loop(df),
    dict_iteration(df)
)