In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM


In [2]:
# import packages### Helper Functions:
import numpy as np
import pandas as pd

# to plot charts and figures
import matplotlib.pyplot as plt
%matplotlib inline


In [20]:
# Read the Dataset
def readCSV(filename):
    filename = "Dataset\\" + filename + ".csv"
    df = pd.read_csv(filename)
    df = cleanDate(df)
    return df    


# Changing the date to a standard format [dd-mm-yy]
def cleanDate(df):
    df['Date'] = pd.to_datetime(df.Date,format='%d-%b-%Y')
    df.index = df['Date']
    return df
    

#     Helper function to plot VWAP for stocks
def plotChart(df):
    plt.figure(figsize=(24, 8))
    plt.plot(df['Average Price'], label='VWAP')
    plt.xlabel("Date")
    plt.ylabel("Volume Weighted Average Price")
    plt.legend()
    
    
# Print the entire dataset
def printEntireData(df):
    print(df)
    plotChart(df)
    

# Print a sample of the dataset
def printSampleData(df):
    length = len(df)
    list = [0, 5, 6, 9, 10]
    df1 = df[0:5]
    print(df1[df1.columns[list]])
    print(".\n.\n.")
    df2 = df[987:]
    print(df2[df2.columns[list]])
    print("\nDisplaying 10 out of {} rows.".format(length))
    
    
df = readCSV("BIOCON")
printSampleData(df)


            Symbol  High Price  Low Price  Average Price   Volume
Date                                                             
2015-04-13  BIOCON      491.80     468.15         481.59  2337764
2015-04-15  BIOCON      495.70     474.45         486.89  1367669
2015-04-16  BIOCON      482.40     463.00         470.99   917630
2015-04-17  BIOCON      476.25     463.25         470.79   644312
2015-04-20  BIOCON      469.90     445.40         456.25  1160262
.
.
.
            Symbol  High Price  Low Price  Average Price   Volume
Date                                                             
2019-04-05  BIOCON      615.75     610.10         612.58   622322
2019-04-08  BIOCON      616.50     605.50         609.09   769326
2019-04-09  BIOCON      619.90     603.55         614.15  1624259
2019-04-10  BIOCON      636.75     613.80         627.68  2419841

Displaying 10 out of 991 rows.


In [None]:
data = df

df = pd.DataFrame(index=range(0,len(df)),columns=['Date', 'Average Price'])
for i in range(0, len(data)):
    df['Date'][i] = data['Date'][i].date()
    df['Average Price'][i] = data['Average Price'][i]

df.head()

In [None]:
df.index = df.Date
df.drop('Date', axis=1, inplace=True)

dataset = df.values

train = dataset[0:len(dataset)-60,:]
valid = dataset[len(dataset)-60:,:]

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

x_train, y_train = [], []
for i in range(60,len(train)):
    x_train.append(scaled_data[i-60:i,0])
    y_train.append(scaled_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)

x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))


In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50))
model.add(Dense(1))


In [None]:
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2)


In [None]:
inputs = df[len(df) - len(valid) - 60:].values
inputs = inputs.reshape(-1,1)
inputs  = scaler.transform(inputs)

X_test = []
for i in range(60,inputs.shape[0]):
    X_test.append(inputs[i-60:i,0])
X_test = np.array(X_test)

X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
avgPrice = model.predict(X_test)
avgPrice = scaler.inverse_transform(avgPrice)

In [None]:
rms=np.sqrt(np.mean(np.power((valid-avgPrice),2)))
rms

In [None]:
train = df[:927]
valid = df[927:]

flag = np.zeros(shape=(927,1))
result = np.append(flag, avgPrice)
result[:927] = np.NaN

# valid['Predictions'] = avgPrice
plt.figure(figsize=(24, 10))
plt.plot(train['Average Price'])
plt.plot(valid['Average Price'])
plt.plot(result)
plt.legend()