In [1]:
from rolling_ta.data import CSVLoader


import numpy as np
import numba as nb
from numba.types import Array, i4, f8

import pandas as pd



In [2]:
csv_loader = CSVLoader()
btc_data = csv_loader.read_resource()

DEBUG - CSVLoader: Loading from resources/btc_ohlcv.csv


In [3]:
btc_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  10000 non-null  float64
 1   open       10000 non-null  float64
 2   high       10000 non-null  float64
 3   low        10000 non-null  float64
 4   close      10000 non-null  float64
 5   volume     10000 non-null  float64
dtypes: float64(6)
memory usage: 468.9 KB


## Native Python Test
*With pandas / numpy*

In [4]:
# Define function
def sma_native(data: pd.Series, period=14):
    n = data.shape[0]
    sma = list(0 for _ in range(n))
    
    for i in range(n):
        sma[i] = sum(data[i:i + period]) / period
        
    return sma
    

In [5]:
%%timeit
sma_native(btc_data["close"])

129 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Pandas Vectorized Test

In [6]:
def sma_vectorized(data: pd.Series, period=14):
    return data.rolling(period, min_periods=period).sum()

In [7]:
%%timeit
sma_vectorized(btc_data["close"])

204 μs ± 3.4 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


## Numba Test
*Jesus take the wheel*

In [8]:
@nb.njit
def sma_numba(data: np.ndarray[f8], period: i4 = 14) -> np.ndarray[f8]:
    sma = np.zeros_like(data, dtype=np.float64)
    
    for i in range(sma.shape[0] - period):
        sma[i + period] = np.sum(data[i:i+period]) / period
    
    return sma

In [10]:
%%timeit
sma_numba(btc_data["close"].values)

65.2 μs ± 1.3 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
