In [1]:
import numpy as np 
from hmmlearn import hmm
import pandas as pd

import os
import tarfile
import pandas as pd
from pandas import errors as pd_errors
from functools import reduce
import logging
import datetime
import sklearn.metrics as metrics

In [2]:
# load data
# Change the format of date

df = pd.read_csv('fb_stock_data.csv')
df['Date']=df['Date'].apply(lambda x: datetime.datetime.strptime(x, "%m/%d/%Y").strftime("%Y-%m-%d"))

#inverse dataframe
df=df.reindex(index=df.index[::-1])
#df.loc[0:5]
df

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
2240,2012-05-18,38.2318,579377500,42.00,45.000,38.00
2239,2012-05-21,34.0300,167943600,36.53,36.660,33.00
2238,2012-05-22,31.0000,101667700,32.61,33.590,30.94
2237,2012-05-23,32.0000,73541150,31.37,32.500,31.36
2236,2012-05-24,33.0300,50208760,32.95,33.210,31.77
...,...,...,...,...,...,...
4,2021-04-09,312.4600,15988570,311.40,314.740,310.33
3,2021-04-12,311.5400,10881940,311.05,312.150,307.93
2,2021-04-13,309.7600,14036930,312.21,314.428,309.32
1,2021-04-14,302.8200,17421390,307.30,308.030,301.95


In [11]:
import warnings
import logging
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn.hmm import GaussianHMM
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Supress warning in hmmlearn
warnings.filterwarnings("ignore")

plt.style.use('ggplot')
 
class StockPredictor(object):
    def __init__(self, test_size=0.33,
                 n_hidden_states=4, n_latency_days=10,
                 n_steps_frac_change=50, n_steps_frac_high=10,
                 n_steps_frac_low=10):
        self._init_logger()
 
        self.n_latency_days = n_latency_days
 
        self.hmm = GaussianHMM(n_components=n_hidden_states)
 
        self._split_train_test_data(test_size)
 
        self._compute_all_possible_outcomes(
            n_steps_frac_change, n_steps_frac_high, n_steps_frac_low)
 
    def _init_logger(self):
        self._logger = logging.getLogger(__name__)
        handler = logging.StreamHandler()
        formatter = logging.Formatter(
            '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
        handler.setFormatter(formatter)
        self._logger.addHandler(handler)
        self._logger.setLevel(logging.DEBUG)
 
    def _split_train_test_data(self, test_size):
        data = pd.read_csv('fb_stock_data.csv')
        _train_data, test_data = train_test_split(
            data, test_size=test_size, shuffle=False)
 
        self._train_data = _train_data
        self._test_data = test_data
 
    @staticmethod
    def _extract_features(data):
        open_price = np.array(data['Open'])
        close_price = np.array(data['Close/Last'])
        high_price = np.array(data['High'])
        low_price = np.array(data['Low'])
 
        # 計算收盤價、高價和低價的分數變化
        # 這會用到一個特徵
        frac_change = (close_price - open_price) / open_price
        frac_high = (high_price - open_price) / open_price
        frac_low = (open_price - low_price) / open_price
 
        return np.column_stack((frac_change, frac_high, frac_low))
 
    def fit(self):
        self._logger.info('>>> Extracting Features')
        feature_vector = StockPredictor._extract_features(self._train_data)
        self._logger.info('Features extraction Completed <<<')
 
        self.hmm.fit(feature_vector)
 
    def _compute_all_possible_outcomes(self, n_steps_frac_change,
                                       n_steps_frac_high, n_steps_frac_low):
        frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change)
        frac_high_range = np.linspace(0, 0.1, n_steps_frac_high)
        frac_low_range = np.linspace(0, 0.1, n_steps_frac_low)
 
        self._possible_outcomes = np.array(list(itertools.product(
            frac_change_range, frac_high_range, frac_low_range)))
 
    def _get_most_probable_outcome(self, day_index):
        previous_data_start_index = max(0, day_index - self.n_latency_days)
        previous_data_end_index = max(0, day_index - 1)
        previous_data = self._test_data.iloc[previous_data_end_index: previous_data_start_index]
        previous_data_features = StockPredictor._extract_features(
            previous_data)
 
        outcome_score = []
        for possible_outcome in self._possible_outcomes:
            total_data = np.row_stack(
                (previous_data_features, possible_outcome))
            outcome_score.append(self.hmm.score(total_data))
        most_probable_outcome = self._possible_outcomes[np.argmax(
            outcome_score)]
 
        return most_probable_outcome
 
    def predict_close_price(self, day_index):
        open_price = self._test_data.iloc[day_index]['Open']
        predicted_frac_change, _, _ = self._get_most_probable_outcome(
            day_index)
        return open_price * (1 + predicted_frac_change)
 
    def predict_close_prices_for_days(self, days, with_plot=False):
        predicted_close_prices = []
        for day_index in tqdm(range(days)):
            predicted_close_prices.append(self.predict_close_price(day_index))
 
        if with_plot:
            test_data = self._test_data[0: days]
            days = np.array(test_data['date'], dtype="datetime64[ms]")
            actual_close_prices = test_data['close']
 
            fig = plt.figure()
 
            axes = fig.add_subplot(111)
            axes.plot(days, actual_close_prices, 'bo-', label="actual")
            axes.plot(days, predicted_close_prices, 'r+-', label="predicted")
            axes.set_title('FB')
 
            fig.autofmt_xdate()
 
            plt.legend()
            plt.show()
 
        return predicted_close_prices

    def evaluation(self, days):
        print("MAE:", metrics.mean_absolute_error(self._test_data['Close/Last'][0:days], self.predicted_close_prices))
        print("MSE:", metrics.mean_squared_error(self._test_data['Close/Last'][0:days], self.predicted_close_prices))
        print("MedAE:", metrics.median_absolute_error(self._test_data['Close/Last'][0:days], self.predicted_close_prices))
        print("RSQ:", metrics.r2_score(self._test_data['Close/Last'][0:days], self.predicted_close_prices))


In [12]:
#prediction days
days=20

stock_predictor = StockPredictor()
stock_predictor.fit()
res=stock_predictor.predict_close_prices_for_days(days, with_plot=True)

2021-05-28 00:44:06,156 __main__     INFO     >>> Extracting Features
2021-05-28 00:44:06,156 __main__     INFO     >>> Extracting Features
2021-05-28 00:44:06,156 __main__     INFO     >>> Extracting Features
2021-05-28 00:44:06,156 __main__     INFO     >>> Extracting Features
2021-05-28 00:44:06,156 __main__     INFO     >>> Extracting Features
2021-05-28 00:44:06,166 __main__     INFO     Features extraction Completed <<<
2021-05-28 00:44:06,166 __main__     INFO     Features extraction Completed <<<
2021-05-28 00:44:06,166 __main__     INFO     Features extraction Completed <<<
2021-05-28 00:44:06,166 __main__     INFO     Features extraction Completed <<<
2021-05-28 00:44:06,166 __main__     INFO     Features extraction Completed <<<
 10%|█         | 2/20 [00:05<00:48,  2.72s/it]


KeyboardInterrupt: 

In [155]:
stock_predictor.evaluation(days)

MAE: 5.192365816326546
MSE: 31.16920934702742
MedAE: 4.805173469387768
RSQ: -0.12364276043760691
