In [3]:
import pandas as pd
import numpy as np
from ml_rf_project.src.talib_map import ta_map, func_map
import re
from functools import partial
from ml_rf_project.src.features_extraction import *

In [9]:
df = pd.read_csv("../data/data.csv")

In [39]:
with open("../src/feature_names.txt", "r") as f:
    features = f.read()
features = features.split(" ")

In [4]:
def get_funcs():
    with open("../src/feature_names.txt", "r") as f:
        features = f.read()
    interval_feats = re.findall(r"(\d+)_(\w+)_(\d+)", features)
    pattern_feats = re.findall(r"(\d+)_(CDL\w+)", features)
    time_feats = re.findall((r"(\d+)_(ti\w+)"), features)
    ht_feats = re.findall(r"(\d+)_(HT_\w+)", features)

    # create a list of length n with empty placeholders
    func_pool = [None] * len(interval_feats + pattern_feats + ht_feats + time_feats)
    print(len(func_pool))

    for func in interval_feats:
        if func[1] == "MACDHIST" or func[1] == "MACDSIGNAL":
            function = partial(
                ta_map["MACD"],
                fastperiod=int(func[2]),
                slowperiod=int(int(func[2]) * 2),
                signalperiod=int(int(func[2]) / 2),
            )
        elif func[1] == "MACDSIGNALFIX" or func[1] == "MACDHISTFIX":
            function = partial(ta_map["MACDFIX"], signalperiod=int(func[2]))
        elif "timeperiod" in function.parameters:
            function = partial(function, timeperiod=int(func[2]))
        elif "fastperiod" in function.parameters:
            function = partial(
                function, fastperiod=int(func[2]), slowperiod=int(int(func[2])) * 3
            )
        elif func[1] == "STOCH":
            function = partial(
                function,
                fastk_period=int(func[2]),
                slowk_period=int(int(func[2]) / 2),
                slowd_period=int(int(func[2]) / 2),
            )
        elif func[1] == "STOCHF":
            function = partial(
                function, fastk_period=int(func[2]), fastd_period=int(int(func[2]) / 2)
            )

        func_pool[int(func[0])] = function

    for func in pattern_feats:
        function = ta_map[func[1]]
        func_pool[int(func[0])] = function

    for func in ht_feats:
        if func[1] == "HT_PHASORinphase" or func[1] == "HT_PHASORquadrature":
            function = ta_map["HT_PHASOR"]
        elif func[1] == "HT_SINEsine" or func[1] == "HT_SINEleadsine":
            function = ta_map["HT_SINE"]
        else:
            function = ta_map[func[1]]
        func_pool[int(func[0])] = function

    for func in time_feats:
        function = func_map[func[1]]
        func_pool[int(func[0])] = function

    return func_pool

In [5]:
func_pool = get_funcs()

105


In [16]:
func_pool

[functools.partial({'name': 'ADOSC', 'group': 'Volume Indicators', 'display_name': 'Chaikin A/D Oscillator', 'function_flags': None, 'input_names': OrderedDict([('prices', ['high', 'low', 'close', 'volume'])]), 'parameters': OrderedDict([('fastperiod', 3), ('slowperiod', 10)]), 'output_flags': OrderedDict([('real', ['Line'])]), 'output_names': ['real']}, fastperiod=10, slowperiod=30),
 functools.partial({'name': 'ADXR', 'group': 'Momentum Indicators', 'display_name': 'Average Directional Movement Index Rating', 'function_flags': ['Function has an unstable period'], 'input_names': OrderedDict([('prices', ['high', 'low', 'close'])]), 'parameters': OrderedDict([('timeperiod', 14)]), 'output_flags': OrderedDict([('real', ['Line'])]), 'output_names': ['real']}, timeperiod=10),
 functools.partial({'name': 'ADX', 'group': 'Momentum Indicators', 'display_name': 'Average Directional Movement Index', 'function_flags': ['Function has an unstable period'], 'input_names': OrderedDict([('prices', ['

In [15]:
# check for duplicates in func_pool
print(len(func_pool))
print(len(set(func_pool)))

105
104


In [10]:
df["datetime"] = pd.to_datetime(df["datetime"])

In [14]:
func_pool[-1](df.datetime)

0          0
1          1
2          2
3          3
4          4
          ..
379720    55
379721    56
379722    57
379723    58
379724    59
Name: datetime, Length: 379725, dtype: int64

In [13]:
ta_map["SMA"]

{'name': 'SMA', 'group': 'Overlap Studies', 'display_name': 'Simple Moving Average', 'function_flags': ['Output scale same as input'], 'input_names': OrderedDict([('price', 'close')]), 'parameters': OrderedDict([('timeperiod', 30)]), 'output_flags': OrderedDict([('real', ['Line'])]), 'output_names': ['real']}

In [15]:
inputs = {
    "open": np.random.random(100),
    "high": np.random.random(100),
    "low": np.random.random(100),
    "close": np.random.random(100),
    "volume": np.random.random(100),
}

In [17]:
output = ta_map["SMA"](inputs)

In [18]:
output

array([       nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan, 0.56379035,
       0.56592234, 0.56354196, 0.56357981, 0.5759606 , 0.57891895,
       0.60568122, 0.6011442 , 0.58969133, 0.59014174, 0.57350315,
       0.57023573, 0.57325048, 0.59691296, 0.59028906, 0.58817202,
       0.6001237 , 0.62128806, 0.63327843, 0.64249598, 0.63900216,
       0.60932516, 0.61402808, 0.62876631, 0.62519055, 0.60609979,
       0.5998485 , 0.59959993, 0.6154019 , 0.59837884, 0.57913937,
       0.57113615, 0.57756787, 0.57938978, 0.56931161, 0.54834979,
       0.52476594, 0.51764841, 0.5154344 , 0.50179562, 0.5002672 ,
       0.50768852, 0.51649726, 0.49683872, 0.51526277, 0.51690

In [5]:
ta.RSI.parameters

AttributeError: 'function' object has no attribute 'parameters'

In [6]:
from talib import abstract

abstract.SMA.parameters

OrderedDict([('timeperiod', 30)])

In [2]:
df = pd.read_csv("data.csv")

In [3]:
generate_time_features(df)

In [5]:
df.datetime

0        2018-11-27 09:00:00
1        2018-11-27 09:01:00
2        2018-11-27 09:02:00
3        2018-11-27 09:03:00
4        2018-11-27 09:04:00
                 ...        
379720   2023-07-27 22:55:00
379721   2023-07-27 22:56:00
379722   2023-07-27 22:57:00
379723   2023-07-27 22:58:00
379724   2023-07-27 22:59:00
Name: datetime, Length: 379725, dtype: datetime64[ns]

In [5]:
with open("feature_names.txt", "r") as f:
    features = f.read()

In [2]:
import re

# Save this for production use!

In [11]:
with open("feature_names.txt", "r") as f:
    features = f.read()
feats = re.findall(r"(\w+\D)(\d+\b)", features)
func_pool = []

In [12]:
for func in feats:
    if func[0] == "MACDHIST" or func[0] == "MACDSIGNAL":
        continue
    if func[0] == "MACDSIGNALFIX" or func[0] == "MACDHISTFIX":
        continue
    if func[0] == "STOCHRSI_k" or func[0] == "STOCHRSI_d":
        continue
    if func[0] == "HT_PHASORinphase" or func[0] == "HT_PHASORquadrature":
        func[0] = "HT_PHASOR"
    if func[0] == "HT_SINEsine" or func[0] == "HT_SINEleadsine":
        func[0] = "HT_SINE"
    function = getattr(ta, func[0])
    func_pool.append((function, int(func[1])))

In [25]:
inspect.signature(func_pool[0][0])

ValueError: no signature found for builtin <built-in function ADOSC>

In [15]:
import inspect

In [17]:
for func, lag in func_pool:
    parameters = inspect.signature(func).parameters

ValueError: no signature found for builtin <built-in function ADOSC>

In [5]:
import talib as ta

In [8]:
getattr(ta, feats[0][0])

<function talib._ta_lib.ADOSC>