In [22]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline
from keras.models import Sequential
from keras.layers import Dense
import keras.backend as K
from keras.callbacks import EarlyStopping
from keras.layers import LSTM
from sklearn.model_selection import train_test_split


In [4]:
#Generates the a
def generateGraph(train, test, title, xlabel, ylabel, legend, fileTitle):
    ax, fig = plt.subplots(figsize=(10, 8))
    fig.plot(train)
    fig.plot(test)
    fig.set_xlabel(xlabel)
    fig.set_ylabel(ylabel)
    fig.set_title('{} - {}'.format(title, fileTitle))
    fig.legend(legend)
    ax.savefig('./graphs/{}.png'.format(fileTitle))
    

In [28]:
# Reads in the csv file with the given path. If split is 0, then it's assumed not to be analyzing and instead predicting future values. 
def readData(path, split):
    df = pd.read_csv(path)
    print(df.head())
    print("------------------------------------------------------------------")
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    df = df['Close']
    if split > 0:
        train = df.iloc[:-split]
        test = df.iloc[-split:]
        dates = df.index[-split+1:]
        return train, test, dates
    else:
        train = df.shift(future).dropna()
        test = df.shift(-future).dropna()
        dates = pd.date_range(df.index[-1], future)
        return train, test, dates


In [6]:
def scaleData(train, test):
    sc= MinMaxScaler()
    train_sc = sc.fit_transform(train.values.reshape(-1, 1))
    test_sc = sc.transform(test.values.reshape(-1, 1))
    x_train = train_sc[:-1]
    y_train = train_sc[1:]

    x_test = test_sc[:-1]
    y_test = test_sc[1:]
    
    return x_train, y_train, x_test, y_test, sc

In [7]:
def createModel(x_train, y_train):
    x_train_t = x_train[:, None]
    
    K.clear_session()
    
    early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)
    model = Sequential()
    model.add(LSTM(6, input_shape=(1, 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(x_train_t, y_train, epochs=100, batch_size=16, verbose=1, callbacks=[early_stop])
    return model

In [8]:
def saveSheet(y_pred, y_test, dates, sc, fileName):
    df = pd.DataFrame()
    y_pred = sc.inverse_transform(y_pred)
    y_test = sc.inverse_transform(y_test)
    df['Date'] = dates
    df['Predicted Close'] = y_pred.round(2)
    df['Actual Close'] = y_test.round(2)
    df['% error'] = ((y_pred - y_test) / y_test*100).round(2)
    print(df)
    df.to_csv('./results/{}.csv'.format(fileName))

In [9]:
def analyze(path, split, fileName):
    train, test, dates = readData(path, split)
    generateGraph(train,
                  test,
                  'Training and Testing Data split',
                  'Dates',
                  'Closing value',
                  ['train', 'test'],
                  '{}_train_test_split'.format(fileName)
                 )
    x_train, y_train, x_test, y_test, sc = scaleData(train, test)
    model = createModel(x_train, y_train)
    x_test_t = x_test[:, None]
    y_pred = model.predict(x_test_t)
    generateGraph(y_test,
                  y_pred,
                  'Model results vs Actual value',
                  'Future days',
                  'Closing Value',
                  ['actual', 'predicted'],
                  '{}_results'.format(fileName)
                 )
    print(y_pred.shape, y_test.shape, dates.shape)
    saveSheet(y_pred, y_test, dates, sc, fileName)
    
    


In [20]:
def predict(path, future, fileName):
    train, test, dates = readData(path, 0, future)
    

In [11]:
def runAnalyze():
    import os
    for filename in os.listdir('./data'):
        analyze('./data/{}'.format(filename), 180, filename)

In [30]:
predict('./data/WIKI_FB.csv', 3, 'WIKI_FB')

         Date   Open   High    Low    Close       Volume  Ex-Dividend  \
0  2012-05-18  42.05  45.00  38.00  38.2318  573576400.0          0.0   
1  2012-05-21  36.53  36.66  33.00  34.0300  168192700.0          0.0   
2  2012-05-22  32.61  33.59  30.94  31.0000  101786600.0          0.0   
3  2012-05-23  31.37  32.50  31.36  32.0000   73600000.0          0.0   
4  2012-05-24  32.95  33.21  31.77  33.0300   50237200.0          0.0   

   Split Ratio  Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
0          1.0      42.05      45.00     38.00     38.2318  573576400.0  
1          1.0      36.53      36.66     33.00     34.0300  168192700.0  
2          1.0      32.61      33.59     30.94     31.0000  101786600.0  
3          1.0      31.37      32.50     31.36     32.0000   73600000.0  
4          1.0      32.95      33.21     31.77     33.0300   50237200.0  
------------------------------------------------------------------
Date
2012-05-23     38.2318
2012-05-24     34.0300

In [31]:
d = pd.DataFrame()
d['Original'] = [1, 2, 3, 4, 5]
d['shift1'] = d['Original'].shift(1)
d['shift-1'] = d['Original'].shift(-1)
d

Unnamed: 0,Original,shift1,shift-1
0,1,,2.0
1,2,1.0,3.0
2,3,2.0,4.0
3,4,3.0,5.0
4,5,4.0,
