In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm.classes import SVR
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.stattools import arma_order_select_ic
from copy import deepcopy
import glob

In [3]:
class InputModel:
    def __init__(self, rawData, firstDate, numDateOfData, pTrain, lol):
        self.rawData = rawData
        self.firstDate = firstDate
        self.numDateOfData = numDateOfData
        self.pTrain = pTrain
        self.lol = lol
    
    def getSeriesData(self):
        pivot = 0
        prices = []
        dates = []
        
        for i in range(len(self.rawData)):
            if self.rawData[i] == self.firstDate:
                pivot = i
                break
                
        dataForCal = self.rawData[pivot:pivot+self.numDateOfData]
        
        for line in dataForCal:
            tokens = line.split(",")
            prices.append(tokens[1])
            dates.append(tokens[0])
        
        index = pd.DatetimeIndex(dates)
        index = pd.to_period(index, 'D')
        seriesData = pd.Series(prices, index=index)
        return seriesData
    
    def getTrainData(self):
        series = self.getSeriesData()
        return series[:int(pTrain*len(series)) + self.lol]
    
    def getComponents(self):
        trainData = self.getTrainData()
        trend = deepcopy(trainData[5:])
        for i in range(len(trend)):
            trend[i] = trainData[i:i+5].describe()[1]
        deTrendSeries = trainData[5:]/trend
        seasonalComponents = []
        temprate0 = []
        temprate1 = []
        temprate2 = []
        temprate3 = []
        temprate4 = []
        for i in range(len(deTrendSeries)):
            if deTrendSeries.index[i].weekday() == 0:
                temprate0.append(deTrendSeries[i])
            if deTrendSeries.index[i].weekday() == 1:
                temprate1.append(deTrendSeries[i])
            if deTrendSeries.index[i].weekday() == 2:
                temprate2.append(deTrendSeries[i])
            if deTrendSeries.index[i].weekday() == 3:
                temprate3.append(deTrendSeries[i])
            if deTrendSeries.index[i].weekday() == 4:
                temprate4.append(deTrendSeries[i])
        for i in range(5):
            seasonalComponents.append(np.mean(temprate0), np.mean(temprate1), np.mean(temprate2),
                                      np.mean(temprate3), np.mean(temprate4))
        seasonal = deepcopy(trainData[5:])
        for i in range(len(seasonal)):
            if seasonal.index[i].weekday() == 0:
                seasonal[i] = seasonalComponents[0]
            if seasonal.index[i].weekday() == 1:
                seasonal[i] = seasonalComponents[1]
            if seasonal.index[i].weekday() == 2:
                seasonal[i] = seasonalComponents[2]
            if seasonal.index[i].weekday() == 3:
                seasonal[i] = seasonalComponents[3]
            if seasonal.index[i].weekday() == 4:
                seasonal[i] = seasonalComponents[4]
        residual = deTrendSeries/seasonal
        return trend, seasonal, resiadual
    def getPriceBefore(self):
        seriesData = self.getSeriesData()
        return seriesData[int(pTrain*len(series)) + self.lol - 1]
    
    def getPriceTobePredict(self):
        seriesData = self.getSeriesData()
        return seriesData[int(pTrain*len(series)) + self.lol]
    
    def getPredictedDay(self):
        seriesData = self.getSeriesData()
        return seriesData.index[int(pTrain*len(seriesData)) \
                                + self.lol].strftime("%Y-%m-%d")
    
    def getSeasonal(self):
        seriesData = self.getSeriesData()
        date = seriesData.index[int(pTrain*len(seriesData)) \
                                + self.lol]
        sesonalSeries = self.getComponents()[1]
        return sesonalSeries[date.weekday()]

In [1]:
class PreidictTrend:
    def __init__(self, input):
        self.trend = input.getComponents()[0]
        self.labels = []
        self.samples = []
        self.windowingData()
        self.minTrend = min(self.trend)
        self.maxTrend = max(self.trend)
        
    def normalizeData(self):
        npScaler = []
        for i in range(len(self.trend)):
            npScaler.append((self.trend[i] - self.minTrend)/(self.maxTrend - self.minTrend))
        trendNormalized = pd.Series(npScaler)
        return trendNormalized

    def windowingData(self):
        trendNormalized = self.normalizeData()
        for i in range(len(trendNormalized)-5):
            self.samples.append(trendNormalized[i:i+5])
            self.lables.append(trendNormalized[i+5])
        self.samples = np.array(self.samples)
        self.lables = np.array(self.lables)
    
    def predict(self):
        paramDist = {"C": np.logspace(-5, 1, 7),
                      "epsilon": np.logspace(-6, -1, 6),
                      "kernel": ['rbf', 'linear', 'poly'],
                      "degree": np.logspace(-8,-4,5),
                      "shrinking": [True, False],
                      "tol": np.logspace(-4, -2, 3),
                      "cache_size": [100000.]}
        svrRbf = SVR(max_iter=1e6)
        nIterSearch = 100
        randomSearch = RandomizedSearchCV(svrRbf,
                                          param_distributions = paramDist,
                                          n_iter = nIterSearch,
                                          n_jobs = 1,
                                          random_state = 622
                                         )
        sampleTobePredicted = np.delete(self.samples[-1], 0)
        sampleTobePredicted = np.concatenate((self.samples[-1], self.labels[-1]))
        model = randomSearch
        fittingModel = model.fit(self.samples, self.lables)
        predictValue = fittingModel.predict(sampleTobePredicted)
        predictValue = self.minTrend + predictValue*(self.maxTrend - self.minTrend)
        return predictValue

In [None]:
class PredictResidual:
    def __init__(self, input):
        self.residual = input.getComponents()[1]
        
    def predict(self):
        icOrder = arma_order_select_ic(self.residual,
                                      ic=['aic'],
                                      trend='c',
                                      max_ar=5,
                                      max_ma=6,
                                      fit_kw={'method':'mle'})
        aicOrder = icOrder["aic_min_order"]
        model = ARMA(self.residual, order=aicOrder)
        results = model.fit(trend='c', method='mle', disp=-1)
        predictResult, sigmaResult, CI = results.forecast(steps = 1)
        print("Date %s ARMA forecast: %s" %(input.getPredictedDay(), predictResult))
        std_sigma = sigmaResult[0]
        predictValue = predictResult[0]
        print("Date %s; predict residual %s" %s(input.getPredictDay(),
                                                predictValue))
        return predictValue

In [3]:
class OutPutValidation:
    def __init__(self):
        self.correctedPrediction = 0
        self.mape = 0
        self.dates = []
        self.predictValues = []
        self.realValues = []
        self.columns = [
            "Date",
            "RealPrice",
            "PredictPrice",
            "CorrectPrediction",
        ]
        
    def update(self, predictPrice, input):
        predictDate = input.getPredictedDay()
        realPrice = input.getPriceTobePredict()
        lastPrice = input.getPriceBefore()
        predictError = predictPrice - realPrice
        self.dates.append(predictDate)
        self.predictValues.append(predictPrice)
        self.realValues.append(realPrice)
        self.mape += abs(predictError/realPrice)
        correctPrediction = 0
        if realPrice > lastPrice and predictPrice > lastPrice\
        or realPrice < lastPrice and predictPrice < lastPrice:
            correctPrediction = 1
        self.correctedPrediction += correctPrediction
        print("Result date :%s, error: %s, correctPrediction: %s"%\
             (predictDate, predictError, correctPrediction))
        
        absPath = os.path.dirname(os.path.abspath(__file__))
        outFile = absPath + os.path.sep + "output" + sep + "%s.txt"%(predictDate)
        open(outFile, "w").write("%s\n"%(",".join(self.columns)))
        open(outFile, "a").write("%s,%s,%s,%s"%\
            (predictDate, realPrice, predictPrice, correctPrediction))
        return True
    
    def printResult(self):
        n = len(self.dates)
        self.mape = self.mape/n
        print("Number of predict dates: %s" %n)
        print("Number of trend prediction: %s" %self.correctedPrediction)
        print("MAPE = %s" %self.mape)
        correctPercent = (100*self.correctedPrediction)/n
        print("Trend prediction result: %s" %correctPercent)
    
    def drawPrediction(self, title = None):
        fig, ax = plt.subplots(figsize = (12,6), facecolor = 'white')
        lw = 2
        plt.plot(np.arange(len(self.dates)), self.predictValues,
                 color='darkorange', label = 'data')
        plt.plot(np.arange(len(self.dates)), self.realValues,
                 color='navy', lw='w', label = 'RBF model')
        plt.xlabel('dates')
        plt.ylabel('target')
        plt.title('DowJones-Index prediction Using SVM-ARMA')
        plt.legend()
        plt.savefig("Plots/Prediction.png")
        
    def reloadResultFromFiles(self):
        sep = os.path.sep
        absPath = os.path.dirname(os.path.abspath(__file__)) + sep + "output"
        predictItems = []
        for file in glob.glob(absPath + sep + "*.txt"):
            lines = open(file, "r").readlines()
            headers = lines[0].split(",")
            for i in range(1, len(lines)):
                tokens = lines[i].split(",")
                item = {}
                for j in range(0, len(headers)):
                    item[header[j]] = tokens[j]
                predictItems.append(item)
        predictItems.sort(key=lambda x:x['Date'])
        mape = 0
        nCorrect = 0
        for item in predictItems:
            self.dates.append(item["Date"])
            self.predictValues.append(float(item["PredictPrice"]))
            self.realValues.append(float(item["RealPrice"]))
            self.correctedPrediction += int(item["CorrectPrediction"])
            self.mape += abs(float(item["PredictPrice"])-float(item["RealPrice"]))/float(item["RealPrice"])
            print("Result date %s, real_price: %s, predict_price: %s, correct: %s"\
                  %(self.dates[-1], self.realValues[-1], self.predictValues[-1],
                    int(item["CorrectPrediction"])))
        correctPercent = (100*self.correctedPrediction)/len(predictItems)
        print("Number of predict day: %s" %len(predictItems))
        print("Number of correct prediction: %s" %self.correctedPrediction)
        print("Accuracy of model: %s" %correctPercent)
        print("MAPE = %s" %self.mape)

In [None]:
sep = os.path.sep
absPath = os.path.dirname(os.path.abspath(__file__))
dataFile = absPath + sep + "raw_data.txt"
rawData = open(dataFile, "r").readlines()


predict = 1
numData = 500
pTrain = 0.8
if predict == 1:
    output = OutPutValidation()
    for i in range(numData*(1-pTrain)):
        print("Processing date %s" %i)
        
        input = InputModel(rawdata, firstDate = "2015-01-01",
                           numDateOfData = numData, pTrain = pTrain, lol = i)
        svmTrend = PredictTrend(input)
        armaResidual = PredictResidual(input)
        count = 0
        predictResidual = -1111
        while True:
            if count == 2:
                break
            try:
                count += 1
                print("Start residual prediction using ARMA for date %s"
                      %(input.getPredictedDay()))
                predictResidual = armaResidual.predict()
            except:
                traceback.print_exc()
                continue
            break
        if predict_residual == -1111:
            print("can't deal")
            continue
        
        print("Start trend prediction using SVM for date %s" %input.getPredictedDay)
        predictTrend = svmTrend.predict()
        predictSesonal = input.getSeasonal()
        
        predictResult = predictTrend*predictResidual*predictSeasonal
        
        output.update(predictResult, input)
    output.printResult()
    output.drawPrediction()
    print("DONE!")
    
loadFromFiles = 0
if loadFromFiles == 1:
    output = OutPutValidation()
    output.reloadResultFromFiles()
    output.printResult()
    output.drawPrediction()
    print("DONE!")