In [34]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import itertools

from hmmlearn.hmm import GaussianHMM

df = pd.read_csv('dataset/news_sentiment_augmented_google.csv').drop(['Unnamed: 0'], axis=1)
df = df[['Open', 'High', 'Low', 'Close']]
df.head()

"""plt.figure(figsize = (30,9))
plt.plot(range(df.shape[0]), df['Middle'])
plt.xticks(range(0, df.shape[0], 359), df['Date'].loc[::30])
plt.xlabel('Date', fontsize=30)
plt.ylabel('Mid Price', fontsize=30)
plt.show()"""


training_data = df[:300]
validation_data = df[300:400]
testing_data = df[400:]

training_data



class Stock_Predictor(object):
    
    def __init__(self, days=10, hidden=4):

        self.latent_days = days

        # default to 4 hidden states
        self.hmm = GaussianHMM(n_components=hidden)

        # populate self.training_data and self.testing_data
        self._data_loading_and_spliting()

        # populate self.all_discrete_fracs
        self._discrete_frac_range()


    def _data_loading_and_spliting(self):

        # columns of csv file : Date, Open, High, Low, Close
        data = pandas.read_csv('dataset/news_sentiment_augmented_google.csv').drop(['Unnamed: 0'], axis=1)

        # training_data, testing_data = train_test_split(data, test_size=0.3, shuffle=False)
    
        training_data = data[:300]
        validation_data = data[300:400]
        testing_data = data[400:]

        self.training_data = training_data
        self.testing_data = testing_data 


    def _discrete_frac_range(self):

        # increment : 0.01
        # fracchange : from -0.1 to 0.1, total 20 poiunts
        # frachigh : from 0 to 0.1, total 10 points
        # fraclow : from 0 to 0.1, total 10 points

        frac_change_list = numpy.linspace(-0.1, 0.1, 20)
        frac_high_list = numpy.linspace(0, 0.1, 10)
        frac_low_list = numpy.linspace(0, 0.1, 10)

        self.all_discrete_fracs = numpy.array(list(itertools.product(\
                    frac_change_list, frac_high_list, frac_low_list))) 


    def fraction_calculator(self, data):

        open = numpy.array(data['open'])
        close = numpy.array(data['close'])
        high = numpy.array(data['high'])
        low = numpy.array(data['low'])

        frac_change = (close - open) / open
        frac_high = (high - open) / open
        frac_low = (open - low) / open

        return numpy.column_stack((frac_change, frac_high, frac_low))


    def hmm_fit(self):

        frac_vector = self.fraction_calculator(self.training_data)

        self.hmm.fit(frac_vector)


    def most_likely_fracs(self, curr_date):

        start_date = max(0, curr_date - self.latent_days)
        end_date = max(0, curr_date - 1)

        previous_data = self.testing_data.iloc[end_date: start_date]
        previous_fracs = self.fraction_calculator(previous_data)

        all_probs = []
        for one_fracs in self.all_discrete_fracs:
            one_outcome = numpy.row_stack((previous_fracs, one_fracs))
            all_probs.append(self.hmm.score(one_outcome))

        max_prob = self.all_discrete_fracs[numpy.argmax(all_probs)]

        return max_prob
                         
    def close_price(self, date):
                         
        open = self.testing_data.iloc[date]['Open']                   
        likely_frac_change, _, _ = self.most_likely_fracs(date)                    
        return open * (1 + likely_frac_change) 

    def closing_price_in_range(self):

        # for all rows in testing data
        closing_prices = []
        for date in range(97):
            closing_prices.append(self.close_price(date))
        
        print("here")

        real_closing = self.testing_data['Close']

        figure = plt.figure()

        axes = fig.add_subplot(111)
        axes.plot(range(97), real_closing, 'bo-', label="Actual")
        axes.plot(range(97), real_closing, 'r+-', label="Predicted")

        plt.legend()
        plt.show() 
        
        #print(range(testing_data))
    
def main(object):

    print("here")
    stock_predictor = Stock_Predictotr()
    stock_predictor.hmm_fit()
    stock_predictor.closing_price_in_range()