In [None]:
!pip install pandas numpy matplotlib seaborn keras nltk scikit-learn

In [None]:
!pip install tensorflow

In [None]:
nltk.download('vader_lexicon')

### Importing Required Libraries

In [None]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import unicodedata
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Dense, Activation
import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from sklearn import preprocessing, metrics
from sklearn.preprocessing import MinMaxScaler
from nltk.sentiment.vader import SentimentIntensityAnalyzer

### Reading the data and doing the EDA

In [None]:
stock_price = pd.read_csv('RELIANCE.csv')
news_headline = pd.read_csv('india-news-headlines.csv')

In [None]:
stock_price = stock_price.drop(['Trades','Deliverable Volume','%Deliverble','VWAP','Series','Symbol','Prev Close','Turnover'],axis = 1)

In [None]:
stock_price.head()

In [None]:
news_headline.head()

In [None]:
stock_price.isna().any(), news_headline.isna().any()

In [None]:
stock_price['Date'] = pd.to_datetime(stock_price['Date']).dt.normalize()
stock_price.set_index('Date', inplace= True)
stock_price = stock_price.sort_index(ascending=True, axis=0)
stock_price

In [None]:
news_headline = news_headline.drop_duplicates()
news_headline['publish_date'] = news_headline['publish_date'].astype(str)
news_headline['publish_date'] = news_headline['publish_date'].apply(lambda x: x[0:4]+'-'+x[4:6]+'-'+x[6:8])
news_headline['publish_date'] = pd.to_datetime(news_headline['publish_date']).dt.normalize()
news_headline = news_headline.filter(['publish_date', 'headline_text'])
news_headline = news_headline.groupby(['publish_date'])['headline_text'].apply(lambda x: ','.join(x)).reset_index()
news_headline.set_index('publish_date', inplace= True)
news_headline = news_headline.sort_index(ascending=True, axis=0)
news_headline

In [None]:
stock_data = pd.concat([stock_price, news_headline], axis=1)
stock_data.dropna(axis=0, inplace=True)
stock_data

In [None]:
stock_data['compound'] = ''
stock_data['negative'] = ''
stock_data['neutral'] = ''
stock_data['positive'] = ''
stock_data.head()

### Doing the sentimental analysis and calculating compound,positive,neutral,negative

In [None]:
sid = SentimentIntensityAnalyzer()
stock_data['compound'] = stock_data['headline_text'].apply(lambda x: sid.polarity_scores(x)['compound'])
stock_data['negative'] = stock_data['headline_text'].apply(lambda x: sid.polarity_scores(x)['neg'])
stock_data['neutral'] = stock_data['headline_text'].apply(lambda x: sid.polarity_scores(x)['neu'])
stock_data['positive'] = stock_data['headline_text'].apply(lambda x: sid.polarity_scores(x)['pos'])
stock_data.head()

In [None]:
stock_data

In [None]:
stock_data = stock_data[['Close', 'compound', 'negative', 'neutral', 'positive', 'Open', 'High', 'Low', 'Volume']]
stock_data.head()

In [None]:
stock_data.to_csv('stock_data.csv')

In [None]:
stock_data = pd.read_csv('stock_data.csv', index_col = False)
stock_data.rename(columns={'Unnamed: 0':'Date'}, inplace = True)
stock_data.set_index('Date', inplace=True)
stock_data.head()

### Plotting the close price graph

In [None]:
plt.figure(figsize=(16,10))
stock_data['Close'].plot()
plt.title("Close Price")
plt.xlabel('Date')
plt.ylabel('Close Price (Rs.)')

### Distributing the data for training and testing

In [None]:
percentage_of_data = 1.0
data_to_use = int(percentage_of_data*(len(stock_data)-1))
train_end = int(data_to_use*0.85)
total_data = len(stock_data)
start = total_data - data_to_use
print("Number of records in Training Data:", train_end)
print("Number of records in Test Data:", total_data - train_end)

In [None]:
steps_to_predict = 1
close_price = stock_data.iloc[start:total_data,0] 
compound = stock_data.iloc[start:total_data,1]
negative = stock_data.iloc[start:total_data,2] 
neutral = stock_data.iloc[start:total_data,3] 
positive = stock_data.iloc[start:total_data,4]
open_price = stock_data.iloc[start:total_data,5] 
high = stock_data.iloc[start:total_data,6] 
low = stock_data.iloc[start:total_data,7] 
volume = stock_data.iloc[start:total_data,8] 
print("Close Price:")
close_price

In [None]:
close_price_shifted = close_price.shift(-1) 
compound_shifted = compound.shift(-1) 
data = pd.concat([close_price, close_price_shifted, compound, compound_shifted, volume, open_price, high, low], axis=1)
data.columns = ['close_price', 'close_price_shifted', 'compound', 'compound_shifted','volume', 'open_price', 'high', 'low']
data = data.dropna()    
data.head(10)

### Setting the target variable

In [None]:
y = data['close_price_shifted']
y

In [None]:
cols = ['close_price', 'compound', 'compound_shifted', 'volume', 'open_price', 'high', 'low']
x = data[cols]
x

In [None]:
scaler_x = preprocessing.MinMaxScaler (feature_range=(-1, 1))
x = np.array(x).reshape((len(x) ,len(cols)))
x = scaler_x.fit_transform(x)
scaler_y = preprocessing.MinMaxScaler (feature_range=(-1, 1))
y = np.array (y).reshape ((len( y), 1))
y = scaler_y.fit_transform (y)
x[0:5], y

### Splitting the data into train and test data

In [None]:
X_train = x[0 : train_end,]
X_test = x[train_end+1 : len(x),]    
y_train = y[0 : train_end] 
y_test = y[train_end+1 : len(y)]  
print('Number of rows and columns in the Training set X:', X_train.shape, 'and y:', y_train.shape)
print('Number of rows and columns in the Test set X:', X_test.shape, 'and y:', y_test.shape)

In [None]:
X_train = X_train.reshape (X_train.shape + (1,)) 
X_test = X_test.reshape(X_test.shape + (1,))
print('Shape of Training set X:', X_train.shape)
print('Shape of Test set X:', X_test.shape)

### Creating the LSTM model for prediciton of stock prices

In [None]:
np.random.seed(2016)
model=Sequential()
model.add(LSTM(100,return_sequences=True,activation='tanh',input_shape=(len(cols),1)))
model.add(Dropout(0.1))
model.add(LSTM(100,return_sequences=True,activation='tanh'))
model.add(Dropout(0.1))
model.add(LSTM(100,activation='tanh'))
model.add(Dropout(0.1))
model.add(Dense(1))
model.summary()

In [None]:
model.compile(loss='mse' , optimizer='adam')
model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=8, verbose=1)

### Saving model 

In [None]:
model_json = model.to_json()
with open('model.json', 'w') as json_file:
    json_file.write(model_json)
model.save_weights('model.h5')
print('Model is saved to the disk')

In [None]:
X_test[0:5]

### Prediciting the stock prices by putting data in trained model

In [None]:
predictions = model.predict(X_test) 
predictions = scaler_y.inverse_transform(np.array(predictions).reshape((len(predictions), 1)))
print('Predictions:')
predictions[0:10]

In [None]:
train_loss = model.evaluate(X_train, y_train, batch_size = 1)
test_loss = model.evaluate(X_test, y_test, batch_size = 1)
print('Train Loss =', round(train_loss,4))
print('Test Loss =', round(test_loss,4))

### Checking the root mean square error

In [None]:
root_mean_square_error = np.sqrt(np.mean(np.power((y_test - predictions),2)))
print('Root Mean Square Error =', round(root_mean_square_error,4))

In [None]:
rmse = metrics.mean_squared_error(y_test, predictions)
print('Root Mean Square Error (sklearn.metrics) =', round(np.sqrt(rmse),4))

In [None]:
X_test = scaler_x.inverse_transform(np.array(X_test).reshape((len(X_test), len(cols))))
y_train = scaler_y.inverse_transform(np.array(y_train).reshape((len(y_train), 1)))
y_test = scaler_y.inverse_transform(np.array(y_test).reshape((len(y_test), 1)))

### Plotting the comparsion graph of Close price and Predicted price

In [None]:
plt.figure(figsize=(16,10))
plt.plot(predictions, label="Predicted Close Price")
plt.plot([row[0] for row in y_test], label="Testing Close Price")
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=2)
plt.show()