In [1]:
# import libraries
import math
from datetime import date, timedelta
import pandas_datareader as web
import numpy as np
import pandas as pd
from sqlalchemy import create_engine

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM


In [2]:
# I will be considering dates of stock ranging from current date(today) to 600 days from current day for analysis
today = date.today()
prev_600 = (today - timedelta(600)).strftime("%Y-%m-%d")
print(prev_600, today)

2019-07-16 2021-03-07


In [3]:
# Prediction for stock are based on the following steps:-
# Creating the dataframe by using YAHOO as datasource, today and prev_600 dates
# Split the dataframe into train and test dataset.
# Defining the model. I have used LSTM for this task, which is a variation of Recurrent Neural Network.
# Fit the model by using train dataset
# Predict the stock prices based on test dataset
# Finally store the Date, Actual Price and Prediction price in the database named after Stock

def process_predict_generate_data(stock):
  df = web.DataReader(stock+'.NS', data_source='yahoo', start=prev_600, end=today)
  data = df.filter(['Close'])
  dataset = data.values
  training_data_len = math.ceil(len(dataset)*.8)

  # Scaling the data
  scaler = MinMaxScaler(feature_range=(0,1))
  scaled_data = scaler.fit_transform(dataset)

  # Creating training dataset
  train_data = scaled_data[0:training_data_len, : ]
  x_train = []
  y_train = []
  for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])

  x_train, y_train = np.array(x_train), np.array(y_train)
  x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

  # Building LSTM model
  model = Sequential()
  model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
  model.add(LSTM(50,return_sequences=False))
  model.add(Dense(25))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mean_squared_error')
  model.fit(x_train, y_train, batch_size=1, epochs=1, verbose=1)

  # Creating testing dataset
  test_data = scaled_data[training_data_len - 60: , :]
  x_test = []
  y_test = dataset[training_data_len:, :]
  for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])
  x_test = np.array(x_test)
  x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1], 1))

  #Get the predictions
  predictions = model.predict(x_test)
  predictions = scaler.inverse_transform(predictions)

  train = data[: training_data_len]
  valid = data[training_data_len: ]
  valid['Predictions'] = predictions
  valid['Date'] = valid.index
  database_filename = stock+'.db'
  engine = create_engine('sqlite:///{}'.format(database_filename))

  table_name = database_filename.split('.')[0]
  valid.to_sql(table_name, engine, index=False, if_exists = 'replace')

In [4]:
# List of stock on which Predictions are done.
# Note: These are stocks in NIFTY50
stock_data = [
  "ADANIPORTS","ASIANPAINT","AXISBANK","BAJAJ-AUTO",
  "BAJFINANCE","BAJAJFINSV","BPCL","BHARTIARTL","BRITANNIA",
  "CIPLA","COALINDIA","DIVISLAB","DRREDDY","EICHERMOT",
  "GAIL","GRASIM","HCLTECH","HDFCBANK","HDFCLIFE","HEROMOTOCO",
  "HINDALCO","HINDUNILVR","HDFC","ICICIBANK","ITC","IOC","INDUSINDBK",
  "INFY","JSWSTEEL","KOTAKBANK","LT","M&M","MARUTI","NTPC","NESTLEIND",
  "ONGC","POWERGRID","RELIANCE","SBILIFE","SHREECEM","SBIN","SUNPHARMA","TCS",
  "TATAMOTORS","TATASTEEL","TECHM","TITAN","UPL","ULTRACEMCO","WIPRO"
]

In [5]:
# Iterate through stock_data and perform processing on each stock 
for stock in stock_data:
  process_predict_generate_data(stock)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [6]:
# Compress the list of database into zip filed named as 'data.zip' and finally download it
!zip -r data.zip .
from google.colab import files

files.download("data.zip")

updating: .config/ (stored 0%)
updating: .config/logs/ (stored 0%)
updating: .config/logs/2021.03.01/ (stored 0%)
updating: .config/logs/2021.03.01/14.34.56.151354.log (deflated 53%)
updating: .config/logs/2021.03.01/14.35.29.900487.log (deflated 54%)
updating: .config/logs/2021.03.01/14.35.09.556851.log (deflated 87%)
updating: .config/logs/2021.03.01/14.34.37.071843.log (deflated 92%)
updating: .config/logs/2021.03.01/14.35.14.752951.log (deflated 54%)
updating: .config/logs/2021.03.01/14.35.29.380461.log (deflated 54%)
updating: .config/.last_update_check.json (deflated 25%)
updating: .config/config_sentinel (stored 0%)
updating: .config/.last_survey_prompt.yaml (stored 0%)
updating: .config/.last_opt_in_prompt.yaml (stored 0%)
updating: .config/active_config (stored 0%)
updating: .config/gce (stored 0%)
updating: .config/configurations/ (stored 0%)
updating: .config/configurations/config_default (deflated 15%)
updating: HINDALCO.db (deflated 82%)
updating: KOTAKBANK.db (deflated 82

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>