In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import svm
import joblib

In [4]:
STORAGE_PATH_DATA = "../../persisted_data/feather/{}.feather"
STORAGE_PATH_MODELS = "../../persisted_data/models/{}.joblib"
def load_data_set(name):
    data = pd.read_feather(STORAGE_PATH_DATA.format(name))
    return data

In [5]:
# load the data
all_stocks = load_data_set("all_stocks_10spy_20shift_normalized")

In [6]:
# define the used indicators
all_indicators = all_stocks.columns[~all_stocks.columns.str.contains("future|current", regex=True)]

standard_indicators = ["sma10", "sma20", "sma50", "sma100", "lwma10", "lwma20", "lwma50", "lwma100", "lwma200",
                       "ema10", "ema20", "ema50", "ema100", "rate_of_change20", "rate_of_change50", 
                       "horizontal_position20", "horizontal_position50", "horizontal_position100",
                       "regression_position20", "regression_position50", "regression_position100",
                       "bollinger_position20_2", "bollinger_position50_2", "bollinger_position100_2"]

ftest_selected_indicators = ["macd12_26", "macd_signal12_26", "cci50", "horizontal_lower20", "horizontal_lower50", 
                             "ma_cross50_200", "horizontal_lower200", "regression_threshold20", 
                             "regression_threshold100", "chande100", "horizontal_lower100", "ma_cross20_50", 
                             "cci_threshold50", "lwma10", "regression_position20", "regression_upper20",
                             "regression_position100", "volatility10", "ema10", "aaron_oscillator40", 
                             "bollinger_lower20_2", "rsi20", "aaron_oscillator25", "horizontal_upper20",
                             "volatility20", "aaron_up40", "aaron_oscillator15", "aaron_down40", "sma10", 
                             "aaron_down25"]

In [7]:
# default mappers for the labels
def future_price_mapper(data_set):
    # map to 1, if price is increasing, -1 otherwise
    return np.sign(data_set["future_price"] - 1)

def future_volatility_mapper(data_set, interval=10):
    volatility_indicator = "future_volatility{}".format(interval)
    # map to 1, if volatility is increasing, -1 otherwise
    return np.sign(data_set[volatility_indicator] - 1)

# currying function used to fix the interval and pass the resulting function to the data loader
def build_future_volatility_mapper(interval):
    return lambda data_set: future_volatility_mapper(data_set, interval)

In [8]:
def create_svm_model(data_set, indicators=standard_indicators, label_mapper=future_price_mapper, 
                     n_samples=10000, file_name=None):
    samples = data_set.sample(n_samples)
    # select the indicators as features
    features = samples[indicators]
    # select the labels
    labels = label_mapper(samples)
    
    # fit the svm classifier
    classifier = svm.SVC(kernel="linear")
    classifier.fit(features, labels)
    
    if isinstance(file_name, str):
        joblib.dump(classifier, STORAGE_PATH_MODELS.format(file_name)) 
    
    return classifier

In [7]:
create_svm_model(all_stocks, standard_indicators, future_price_mapper, 10000, "price_predictor_svm_standard")

In [9]:
create_svm_model(all_stocks, ftest_selected_indicators, future_price_mapper, 10000, "price_predictor_svm_fselected")

In [None]:
create_svm_model(all_stocks, standard_indicators, build_future_volatility_mapper(10), 10000, "volatility10_predictor_svm_standard")