In [1]:
from decision_tree import DecisionTree, TreeNode, Rule
import json
import os
import pickle

In [2]:
cols = []
col_counts = {}

In [3]:
def get_cols(node:TreeNode):
    global cols, col_counts
    if node is None: return
    rule: Rule = node.rule
    if rule is None: return
    col = rule.predictor
    if col not in cols: cols.append(col)
    col_counts[col] = col_counts.get(col, 0) + 1
    get_cols(node.left)
    get_cols(node.right)

In [4]:
files = [os.path.join('saved_models', file_name) for file_name in os.listdir('saved_models') if file_name.startswith('dt')]

for model_path in files:
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
        
    get_cols(model.tree)
    
cols = [col for col in cols if not col.startswith('row_num')]
col_counts = {col: col_counts[col] for col in cols}

In [5]:
col_counts

{'Open-Close_ma_1-Open-Close_ma_10_val': 5,
 'Close_slope_1_polarity': 3,
 'Volume_ma_10_val': 4,
 'Open-Close_slope_1_val': 2,
 'MACDs_12_26_9_ma_20_changelen_val': 1,
 'Open-Close_ma_1-Open-Close_ma_50_val': 3,
 'BBP_5_2.0_slope_1_changelen_val': 3,
 'BBM_5_2.0_ma_1-BBM_5_2.0_ma_20_changelen_val': 2,
 'Open-High_ma_5-Open-High_ma_20_changelen_val': 1,
 'BBL_5_2.0_ma_5_slope_1_changelen_polarity': 1,
 'High_n-Low_n_ma_50_changelen_val': 1,
 'High_n-Low_n_ma_20_slope_15_changelen_val': 1,
 'BBL_5_2.0_ma_10_slope_15_val': 2,
 'Open_n-Close_n_ma_10_val': 1,
 'Volume_ma_5_slope_5_val': 1,
 'Close_n_slope_1_polarity': 1,
 'Low_ma_5_slope_3_polarity': 1,
 'Open_n_ma_10_slope_5_changelen_polarity': 1,
 'RSI_14_ma_1_val': 1,
 'Open-Close_polarity': 1,
 'Open-High_ma_1-Open-High_ma_50_val': 1,
 'Open-Close_ma_1-Open-Close_ma_20_val': 1,
 'Close_n_ma_1-Close_n_ma_50_changelen_polarity': 1,
 'High_n_ma_1-High_n_ma_20_changelen_polarity': 1,
 'Open-High_ma_1-Open-High_ma_50_polarity': 1}

In [6]:
cols, len(cols)

(['Open-Close_ma_1-Open-Close_ma_10_val',
  'Close_slope_1_polarity',
  'Volume_ma_10_val',
  'Open-Close_slope_1_val',
  'MACDs_12_26_9_ma_20_changelen_val',
  'Open-Close_ma_1-Open-Close_ma_50_val',
  'BBP_5_2.0_slope_1_changelen_val',
  'BBM_5_2.0_ma_1-BBM_5_2.0_ma_20_changelen_val',
  'Open-High_ma_5-Open-High_ma_20_changelen_val',
  'BBL_5_2.0_ma_5_slope_1_changelen_polarity',
  'High_n-Low_n_ma_50_changelen_val',
  'High_n-Low_n_ma_20_slope_15_changelen_val',
  'BBL_5_2.0_ma_10_slope_15_val',
  'Open_n-Close_n_ma_10_val',
  'Volume_ma_5_slope_5_val',
  'Close_n_slope_1_polarity',
  'Low_ma_5_slope_3_polarity',
  'Open_n_ma_10_slope_5_changelen_polarity',
  'RSI_14_ma_1_val',
  'Open-Close_polarity',
  'Open-High_ma_1-Open-High_ma_50_val',
  'Open-Close_ma_1-Open-Close_ma_20_val',
  'Close_n_ma_1-Close_n_ma_50_changelen_polarity',
  'High_n_ma_1-High_n_ma_20_changelen_polarity',
  'Open-High_ma_1-Open-High_ma_50_polarity'],
 25)

In [8]:
cols_to_use = ['CMO_14', 'Open', 'Close', 'SMA_20', 'SMA_10', 'Open_n', 'Close_n',
               'High_n', 'Low_n', 'High', 'Low', 'BBM_5_2.0', 'Volume', 'RSI_14',
               'MACDs_12_26_9', 'BBL_5_2.0', 'BBP_5_2.0']

In [9]:
with open('additional_utils/cols.pkl', 'rb') as f:
    d = pickle.load(f)
d['imp_cols'] = cols
d['cols_to_use'] = cols_to_use
d['col_counts'] = col_counts
with open('additional_utils/cols.pkl', 'wb') as f:
    pickle.dump(d, f)

In [11]:
{k:v for k,v in sorted(col_counts.items(), key=lambda x: -x[1])}

{'Open-Close_ma_1-Open-Close_ma_10_val': 5,
 'Volume_ma_10_val': 4,
 'Close_slope_1_polarity': 3,
 'Open-Close_ma_1-Open-Close_ma_50_val': 3,
 'BBP_5_2.0_slope_1_changelen_val': 3,
 'Open-Close_slope_1_val': 2,
 'BBM_5_2.0_ma_1-BBM_5_2.0_ma_20_changelen_val': 2,
 'BBL_5_2.0_ma_10_slope_15_val': 2,
 'MACDs_12_26_9_ma_20_changelen_val': 1,
 'Open-High_ma_5-Open-High_ma_20_changelen_val': 1,
 'BBL_5_2.0_ma_5_slope_1_changelen_polarity': 1,
 'High_n-Low_n_ma_50_changelen_val': 1,
 'High_n-Low_n_ma_20_slope_15_changelen_val': 1,
 'Open_n-Close_n_ma_10_val': 1,
 'Volume_ma_5_slope_5_val': 1,
 'Close_n_slope_1_polarity': 1,
 'Low_ma_5_slope_3_polarity': 1,
 'Open_n_ma_10_slope_5_changelen_polarity': 1,
 'RSI_14_ma_1_val': 1,
 'Open-Close_polarity': 1,
 'Open-High_ma_1-Open-High_ma_50_val': 1,
 'Open-Close_ma_1-Open-Close_ma_20_val': 1,
 'Close_n_ma_1-Close_n_ma_50_changelen_polarity': 1,
 'High_n_ma_1-High_n_ma_20_changelen_polarity': 1,
 'Open-High_ma_1-Open-High_ma_50_polarity': 1}