In [1]:
import numpy as np
from sklearn.svm import LinearSVR, SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import RegressorChain
import tensorflow as tf
import math, os, sys, datetime
from pandas_datareader import data as pdr
from datetime import date, timedelta
import yfinance as yf
yf.pdr_override()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from functools import reduce
from sklearn import preprocessing
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.linear_model import LinearRegression
from utils.preprocessstock import preprocess
import tensorflow_probability as tfp
from ta import add_all_ta_features
from ta.utils import dropna
from sklearn.metrics import mean_squared_error  as MSE
from sklearn.metrics import explained_variance_score

#Tickers list
#We can add and delete any ticker from the list to get desired ticker live data
tickers=['AAPL','AMZN','FB','GOOG','MMM','MSFT','NFLX','NKE','NVDA','INTC','CSCO','WMT','TSLA','EBAY',
         'ORCL','CBG.BK','OSP.BK','BBL.BK','SCB.BK','LH.BK','MINT.BK','PTT.BK','BANPU.BK','ADVANC.BK',
         'TRUE.BK','AOT.BK','BEM.BK','BTS.BK','CPALL.BK','CPN.BK']
# tickers2=['CBG.BK','OSP.BK','BBL.BK','SCB.BK','MINT.BK','PTT.BK','ADVANC.BK',
#          'AOT.BK','CPALL.BK','CPN.BK']

startdate = datetime.datetime(2017, 1, 13)
enddate = datetime.datetime(2021, 1, 1)
timesteps = 7

In [2]:
def evaluate(model, test_features, test_labels, getmax, getmin):
    predictions = model.predict(test_features)
    predictions = predictions.reshape(1,-1)[0]
    for i in range(0, len(predictions)):
        predictions[i] = (predictions[i]*(getmax-getmin))+getmin
    test_labels = test_labels.reshape(1,-1)[0]
    for i in range(0, len(test_labels)):
        test_labels[i] = (test_labels[i]*(getmax-getmin))+getmin   
    errors = abs(predictions - test_labels)
    mape = 100 * np.mean(errors / test_labels)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('RMSE: {:0.4f}' .format(math.sqrt(MSE(test_labels, predictions))))
    print('EVS: {:0.4f}' .format(explained_variance_score(test_labels, predictions)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    
    return accuracy

In [3]:
model = make_pipeline(StandardScaler(), SVR(kernel='linear', C=1.0, epsilon=0.2))
wrapper = RegressorChain(model)
for stocks in tickers:
    # Load Data
    ticker = yf.Ticker(stocks)
    data = ticker.history(start=startdate, end=enddate) 
    data['next_Close'] = data['Close'].shift(-7)
    data = data.drop(columns=['Dividends', 'Stock Splits'])
    data = dropna(data)
    # Add Indicator
    data = add_all_ta_features(data, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True)
    data = data.drop(columns=['Volume', 'Open', 'High', 'Low'])
    # Feature Selection
    y = data['next_Close']
    featureScores = pd.DataFrame(data[data.columns[1:]].corr()['next_Close'][:])
    x_list = []
    for i in range(0, len(featureScores)):
        if abs(featureScores.next_Close[i]) > 0.90:
            x_list.append(featureScores.index[i])
    X = data[x_list]
    X = X.drop(columns=['next_Close'])
    sfs1 = SFS(LinearRegression(), k_features=(1,5), forward=True, floating=False, cv=0)
    sfs1.fit(X, y)
    k_feature_names = list(sfs1.k_feature_names_)
    features = data[k_feature_names]
    # Perporcess
    min_max_scaler = preprocessing.MinMaxScaler()
    features = min_max_scaler.fit_transform(features)
    features = features[:len(features)//timesteps*timesteps].reshape((len(features)//timesteps, timesteps, 5))
    
    labels = data[['next_Close']]
    getmax = labels.max()
    getmin = labels.min()
    labels = min_max_scaler.fit_transform(labels)
    labels = labels[:len(labels)//timesteps*timesteps].reshape((len(labels)//timesteps, timesteps, 1))
    labels = np.squeeze(labels)
    
    train_test_split_factor = .80
    validation_split_factor = .20
    train_x, train_y, test_x, test_y = features[:math.floor(len(features)*train_test_split_factor)], labels[:math.floor(len(labels)*train_test_split_factor)], features[math.floor(len(features)*train_test_split_factor):], labels[math.floor(len(labels)*train_test_split_factor):]
    train_x, test_x = np.expand_dims(train_x, axis=1), np.expand_dims(test_x, axis=1)
    train_x, test_x = np.expand_dims(train_x, axis=-1), np.expand_dims(test_x, axis=-1)
    train_x = train_x.reshape(train_x.shape[0], 35).astype('float32')
    test_x = test_x.reshape(test_x.shape[0], 35).astype('float32')
    
    wrapper.fit(train_x, train_y)
    print(f'{"="*52}')
    print(f"Stock : {stocks}")
    accuracy = evaluate(wrapper, test_x, test_y, getmax, getmin)


Stock : AAPL
Model Performance
Average Error: 35.9121 degrees.
RMSE: 40.4521
EVS: 0.2595
Accuracy = 66.01%.
Stock : AMZN
Model Performance
Average Error: 1086.0001 degrees.
RMSE: 1158.2753
EVS: 0.2899
Accuracy = 63.08%.
Stock : FB
Model Performance
Average Error: 51.4163 degrees.
RMSE: 57.1946
EVS: 0.4027
Accuracy = 79.66%.
Stock : GOOG
Model Performance
Average Error: 129.9483 degrees.
RMSE: 152.5607
EVS: 0.6263
Accuracy = 91.50%.
Stock : MMM
Model Performance
Average Error: 12.2815 degrees.
RMSE: 14.9173
EVS: 0.5401
Accuracy = 91.48%.
Stock : MSFT
Model Performance
Average Error: 37.6325 degrees.
RMSE: 39.7877
EVS: 0.5805
Accuracy = 81.25%.
Stock : NFLX
Model Performance
Average Error: 101.7334 degrees.
RMSE: 108.3798
EVS: 0.5033
Accuracy = 78.73%.
Stock : NKE
Model Performance
Average Error: 20.5725 degrees.
RMSE: 24.2926
EVS: 0.4266
Accuracy = 82.01%.
Stock : NVDA
Model Performance
Average Error: 216.4232 degrees.
RMSE: 240.9871
EVS: 0.0450
Accuracy = 52.38%.
Stock : INTC
Model Per

In [4]:
# save model
import joblib
joblib.dump(wrapper, 'weights/svm.sav')

['weights/svm.sav']