In [65]:
import pandas as pd
import numpy as np
from numba import jit
import time

periods=100000

# Sample DataFrame (for demonstration)
np.random.seed(0)  # For reproducibility
dates = pd.date_range(start='2023-01-01 00:00:00', periods=periods, freq='S')
df = pd.DataFrame(np.random.randn(periods, 2), index=dates, columns=list('AB'))

# Define a custom function to apply over the rolling window

@jit(nopython=True)
def f(window):
    # Ensure there are enough data points in the window
    if len(window) == 3:
        #if mid-point-to-mid-point is decreasing, then calc gap by C1.L-C3.H
        #Otherwise C3.L-C1.H
        return window[0][0]-window[2][1]
    else:
        return np.nan

start_time = time.time()
# Apply the custom function over a rolling window of size 3 with raw=True
df['diff'] = df.rolling(3, method='table').apply(f, raw=True, engine='numba')['A']

# Print the DataFrame
print(df)


end_time = time.time()
print(f'Execution time: {end_time - start_time:.3f} seconds')

                            A         B      diff
2023-01-01 00:00:00  1.764052  0.400157       NaN
2023-01-01 00:00:01  0.978738  2.240893       NaN
2023-01-01 00:00:02  1.867558 -0.977278  2.741330
2023-01-01 00:00:03  0.950088 -0.151357  1.130095
2023-01-01 00:00:04 -0.103219  0.410599  1.456959
...                       ...       ...       ...
2023-01-02 03:46:35  0.890029 -1.371535  1.764317
2023-01-02 03:46:36 -0.134089  1.725577 -1.171629
2023-01-02 03:46:37 -1.751198  0.271827  0.618201
2023-01-02 03:46:38  0.050507  0.667815 -0.801904
2023-01-02 03:46:39 -1.021195 -0.345373 -1.405825

[100000 rows x 3 columns]
Execution time: 0.546 seconds


In [34]:
import pandas as pd
import numpy as np

# Sample DataFrame (for demonstration)
np.random.seed(0)  # For reproducibility
dates = pd.date_range('20230101', periods=10)
df = pd.DataFrame(np.random.randn(10, 2), index=dates, columns=list('AB'))

# Define a custom function to apply over the rolling window
def f(window):
    if len(window) == 3:
        # Assuming you want to subtract 'B' of the first from 'A' of the last
        return window.iloc[-1]['A'] - window.iloc[0]['B']
    else:
        return np.nan

# Apply the custom function to each window
# Here, we operate on each row as a Series, combining both 'A' and 'B' columns
some_result = df.apply(lambda row: f(df.loc[row.name - pd.Timedelta('2D'):row.name]), axis=1)

# Print the DataFrame
print(df)
print(some_result)


                   A         B
2023-01-01  1.764052  0.400157
2023-01-02  0.978738  2.240893
2023-01-03  1.867558 -0.977278
2023-01-04  0.950088 -0.151357
2023-01-05 -0.103219  0.410599
2023-01-06  0.144044  1.454274
2023-01-07  0.761038  0.121675
2023-01-08  0.443863  0.333674
2023-01-09  1.494079 -0.205158
2023-01-10  0.313068 -0.854096
2023-01-01         NaN
2023-01-02         NaN
2023-01-03    1.467401
2023-01-04   -1.290805
2023-01-05    0.874059
2023-01-06    0.295401
2023-01-07    0.350439
2023-01-08   -1.010410
2023-01-09    1.372404
2023-01-10   -0.020607
Freq: D, dtype: float64
