<a href="https://colab.research.google.com/github/ngoyal16/ALGO-Trading/blob/master/stock_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install numpy
!pip install pandas
!pip install tensorflow
!pip install keras
!pip install pandas-datareader
!pip install yfinance

Collecting yfinance
  Downloading https://files.pythonhosted.org/packages/31/14/6065cddc70cdf06dc6bb456e6636e64ededa294882e7af0dd21b3d57099f/yfinance-0.1.42.tar.gz
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Stored in directory: /root/.cache/pip/wheels/e0/ac/b0/99fa95a2a224e0de38cd022e7d2c1d71ebce9ceabcaf4c1c11
Successfully built yfinance
Installing collected packages: yfinance
Successfully installed yfinance-0.1.42


In [0]:
#UDACITY Learning

In [0]:
import pandas_datareader.data as pdr
import yfinance as yf
import time

yf.pdr_override()

In [0]:
"""Utility functions"""

import os

def symbol_to_path(symbol, base_dir="data"):
  """Return CSV File path given ticker symbol."""
  return os.path.join(base_dir, "{}.csv".format(str(symbol)))

In [18]:
def get_stock_data(ticker, start_date, end_date):
    """
    Gets historical stock data of given tickers between dates
    :param ticker: company, or companies whose data is to fetched
    :type ticker: string or list of strings
    :param start_date: starting date for stock prices
    :type start_date: string of date "YYYY-mm-dd"
    :param end_date: end date for stock prices
    :type end_date: string of date "YYYY-mm-dd"
    :return: stock_data.csv
    """
    i = 1
    try:
        all_data = pdr.get_data_yahoo(ticker, start_date, end_date)
    except ValueError:
        print("ValueError, trying again")
        i += 1
        if i < 5:
            time.sleep(10)
            get_stock_data(ticker, start_date, end_date)
        else:
            print("Tried 5 times, Yahoo error. Trying after 2 minutes")
            time.sleep(120)
            get_stock_data(ticker, start_date, end_date)
    stock_data = all_data["Adj Close"]
    
    all_data.to_csv(ticker + '.csv')
    stock_data.to_csv("stock_prices.csv")


def get_sp500(start_date, end_date):
    """
    Gets sp500 price data
    :param start_date: starting date for sp500 prices
    :type start_date: string of date "Y-m-d"
    :param end_date: end date for sp500 prices
    :type end_date: string of date "Y-m-d"
    :return: sp500_data.csv
    """
    i = 1
    try:
        sp500_all_data = pdr.get_data_yahoo("SPY", start_date, end_date)
    except ValueError:
        print("ValueError, trying again")
        i += 1
        if i < 5:
            time.sleep(10)
            get_stock_data(start_date, end_date)
        else:
            print("Tried 5 times, Yahoo error. Trying after 2 minutes")
            time.sleep(120)
            get_stock_data(start_date, end_date)
    sp500_data = sp500_all_data["Adj Close"]
    
    sp500_all_data.to_csv("sp500_all_data.csv")
    sp500_data.to_csv("sp500_data.csv")


if __name__ == "__main__":
    # get_stock_data("AAPL", "2018-05-01", "2018-06-01")
    get_sp500(start_date, end_date)

[*********************100%***********************]  1 of 1 downloaded




In [0]:
symbols = ['AAPL', 'IBM', 'TATAMOTORS.NS']
start_date  = "2019-05-01"
end_date = "2019-06-18"

In [6]:
for symbol in symbols:
  get_stock_data(symbol, start_date, end_date)

[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded




[*********************100%***********************]  1 of 1 downloaded


In [0]:
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
def get_max_close(symbol):
  """Return the maximum closing value for stock indicated by symbol.
  
  Note: Data for a stock is stored in file: <symbol>.csv
  """
  df = pd.read_csv("{}.csv".format(symbol)) # read in data
  return df['Close'].max() # compute and return

In [0]:
def get_mean_volume(symbol):
  """Return the mean volume for stock indicated by symbol.
  
  Note: Data for a stock is stored in file: <symbol>.csv
  """
  df = pd.read_csv("{}.csv".format(symbol)) # read in data
  return df['Volume'].mean() # compute and return

In [0]:
def plt_adj_close(symbol):
  df = pd.read_csv("{}.csv".format(symbol)) # read in data
  df['Adj Close'].plot()
  plt.show()

In [0]:
def plt_high(symbol):
  df = pd.read_csv("{}.csv".format(symbol)) # read in data
  df['High'].plot()
  plt.show()

In [0]:
def plt_all(symbol):
  df = pd.read_csv("{}.csv".format(symbol)) # read in data
  df[['Open', 'High', 'Low', 'Close', 'Adj Close']].plot()
  plt.show()

In [34]:
# Create a date range
dates = pd.date_range(start_date, end_date)

# Create an empty dataframe
df = pd.DataFrame(index=dates)

# Read SP500 data into temporary dataframe
dfSP500 = pd.read_csv("sp500_all_data.csv", 
  index_col = "Date", 
  parse_dates = True,
  usecols = ['Date', 'Adj Close'],
  na_values = ['nan']
)

# Rename VALUES column to 'SP500' to prevent clash
dfSP500 = dfSP500.rename(columns={
    'Adj Close': 'SP500'
})

# Join the dataframe using DataFrame Join
df = df.join(dfSP500, how="inner")

for symbol in symbols:
  # Read the `SYMBOL` data into temporary dataframe
  df_temp = pd.read_csv('{}.csv'.format(symbol),
    index_col = 'Date',
    parse_dates = True,
    usecols = ['Date', 'Adj Close'],
    na_values = ['nan']
  )
  
  # Rename VALUES column to 'SYMBOL' to prevent clash
  df_temp = df_temp.rename(columns={
      'Adj Close': symbol
  })
  
  # Join dataframe with left join
  df = df.join(df_temp)

print(df)

             SP500    AAPL     IBM  TATAMOTORS.NS
2019-05-01  291.81  209.71  138.91            NaN
2019-05-02  291.18  208.35  137.95         207.30
2019-05-03  294.03  210.94  138.60         209.45
2019-05-06  292.82  207.68  138.73         199.80
2019-05-07  287.93  202.08  136.02         190.95
2019-05-08  287.53  202.12  136.38         185.10
2019-05-09  286.66  199.95  135.34         186.35
2019-05-10  288.10  197.18  135.32         185.90
2019-05-13  280.86  185.72  131.42         180.75
2019-05-14  283.40  188.66  133.31         184.50
2019-05-15  285.06  190.92  134.40         169.45
2019-05-16  287.70  190.08  135.88         175.40
2019-05-17  285.84  189.00  134.32         176.85
2019-05-20  283.95  183.09  135.12         190.15
2019-05-21  286.51  186.60  136.45         176.80
2019-05-22  285.63  182.78  136.35         179.10
2019-05-23  282.14  179.66  132.39         175.15
2019-05-24  282.78  178.97  132.28         182.30
2019-05-28  280.15  178.23  130.46         180.25


In [0]:
for symbol in symbols:
  # print("Max close:", symbol, get_max_close(symbol))
  # print("Mean volume:", symbol, get_mean_volume(symbol))
  # plt_all(symbol)
  # plt_adj_close(symbol)
  # plt_high(symbol)


In [0]:

import numpy as np
import tensorflow as tf

In [0]:
class DataProcessing:
    def __init__(self, file, train):
        self.file = pd.read_csv(file)
        self.train = train
        self.i = int(self.train * len(self.file))
        self.stock_train = self.file[0: self.i]
        self.stock_test = self.file[self.i:]
        self.input_train = []
        self.output_train = []
        self.input_test = []
        self.output_test = []

    def gen_train(self, seq_len):
        """
        Generates training data
        :param seq_len: length of window
        :return: X_train and Y_train
        """
        for i in range((len(self.stock_train)//seq_len)*seq_len - seq_len - 1):
            x = np.array(self.stock_train.iloc[i: i + seq_len, 1])
            y = np.array([self.stock_train.iloc[i + seq_len + 1, 1]], np.float64)
            self.input_train.append(x)
            self.output_train.append(y)
        self.X_train = np.array(self.input_train)
        self.Y_train = np.array(self.output_train)

    def gen_test(self, seq_len):
        """
        Generates test data
        :param seq_len: Length of window
        :return: X_test and Y_test
        """
        for i in range((len(self.stock_test)//seq_len)*seq_len - seq_len - 1):
            x = np.array(self.stock_test.iloc[i: i + seq_len, 1])
            y = np.array([self.stock_test.iloc[i + seq_len + 1, 1]], np.float64)
            self.input_test.append(x)
            self.output_test.append(y)
        self.X_test = np.array(self.input_test)
        self.Y_test = np.array(self.output_test)

In [0]:
def back_test(strategy, seq_len, ticker, start_date, end_date, dim):
    """
    A simple back test for a given date period
    :param strategy: the chosen strategy. Note to have already formed the model, and fitted with training data.
    :param seq_len: length of the days used for prediction
    :param ticker: company ticker
    :param start_date: starting date
    :type start_date: "YYYY-mm-dd"
    :param end_date: ending date
    :type end_date: "YYYY-mm-dd"
    :param dim: dimension required for strategy: 3dim for LSTM and 2dim for MLP
    :type dim: tuple
    :return: Percentage errors array that gives the errors for every test in the given date range
    """
    data = pdr.get_data_yahoo(ticker, start_date, end_date)
    stock_data = data["Adj Close"]
    errors = []
    
    for i in range((len(stock_data)//10)*10 - seq_len - 1):
        x = np.array(stock_data.iloc[i: i + seq_len]).reshape(dim) / 200
        y = np.array(stock_data.iloc[i + seq_len + 1]) / 200
        predict = strategy.predict(x)
        while predict == 0:
            predict = strategy.predict(x)
        error = (predict - y) / 100
        errors.append(error)
    total_error = np.array(errors)
    print(f"Average error = {total_error.mean()}")

In [0]:
start_date = "2003-01-01"
end_date = "2019-01-01"
get_stock_data("AAPL", start_date=start_date, end_date=end_date)

[*********************100%***********************]  1 of 1 downloaded




In [0]:
process = DataProcessing("stock_prices.csv", 0.9)
process.gen_test(10)
process.gen_train(10)

In [0]:
len(process.X_train)

3609

In [0]:
len(process.X_test)

389

In [0]:
X_train = process.X_train.reshape((3609, 10, 1)) / 200
Y_train = process.Y_train / 200

In [0]:
X_test = process.X_test.reshape(389, 10, 1) / 200
Y_test = process.Y_test / 200

In [0]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(20, input_shape=(10, 1), return_sequences=True))
model.add(tf.keras.layers.LSTM(20))
model.add(tf.keras.layers.Dense(1, activation=tf.nn.relu))

In [0]:
model.compile(optimizer="adam", loss="mean_squared_error")

In [0]:
model.fit(X_train, Y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fe2917a1898>

In [0]:
print(model.evaluate(X_test, Y_test))

0.7811904324357491


In [0]:
data = pdr.get_data_yahoo("AAPL", "2018-12-19", "2019-01-04")
stock = data["Adj Close"]
X_predict = np.array(stock).reshape((1, 10, 1)) / 200
print(model.predict(X_predict)*200)

[*********************100%***********************]  1 of 1 downloaded
[[0.]]


[[0.]]


In [0]:
X_train = process.X_train / 200
Y_train = process.Y_train / 200

In [0]:
X_test = process.X_test / 200
Y_test = process.Y_test / 200

In [0]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(100, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(100, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1, activation=tf.nn.relu))

In [0]:
model.compile(optimizer="adam", loss="mean_squared_error")

In [0]:
model.fit(X_train, Y_train, epochs=90)

Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch 66/90
Epoch 67/90
Epoch 68/90
Epoch 69/90
Epoch 70/90
Epoch 71/90
Epoch 72/90
Epoch 73/90
Epoch 74/90
Epoch 75/90
Epoch 76/90
Epoch 77/90
Epoch 78/90
Epoch 79/90
Epoch 80/90
Epoch 81/90
Epoch 82/90
Epoch 83/90
Epoch 84/90
E

<tensorflow.python.keras.callbacks.History at 0x7fe291361e48>

In [0]:
print(model.evaluate(X_test, Y_test))

0.0006206300412484261


In [0]:
data = pdr.get_data_yahoo("AAPL", "2019-01-02", "2019-01-16")
stock = data["Adj Close"]
X_predict = np.array(stock).reshape((1, 10)) / 200
print(model.predict(X_predict)*200)

[*********************100%***********************]  1 of 1 downloaded
[[154.00316]]


In [0]:
back_test(strategy=model, seq_len=10, ticker='AAPL', start_date=start_date, end_date=end_date, dim=(1, 10))

[*********************100%***********************]  1 of 1 downloaded
Average error = -1.8484941392671317e-05


In [0]:
back_test(strategy=model, seq_len=10, ticker='AAPL', start_date=start_date, end_date=end_date, dim=(1, 10))

[*********************100%***********************]  1 of 1 downloaded
Average error = 4.990389061276801e-05


In [0]:
from datetime import datetime, timedelta
import calendar
import os.path
import numpy as np
import pandas as pd
from itertools import groupby

TYPE_LINE_UNDEFINED = 0
TYPE_LINE_BOOK_DATA_STK = 1
TYPE_LINE_BOOK_DATA_FUT = 2
TYPE_LINE_BOOK_OPTION = 3
TYPE_LINE_TRADED_VOLUME = 4

def checkDate(lineItem):
    try:
        datetime.strptime(lineItem, '%Y/%m/%d')
        return True
    except ValueError:
        return False


def checkTimestamp(lineItem):
    return True


# Returns the type of lineItems
def validateLineItem(lineItems):
    if len(lineItems) < 4:
        return TYPE_LINE_UNDEFINED
    if checkDate(lineItems[0]) and checkTimestamp(lineItems[1]) and lineItems[2] == "Book":
        if lineItems[4][-3:] == "-10":
            return TYPE_LINE_BOOK_DATA_FUT
        else:
            return TYPE_LINE_BOOK_DATA_STK
    if len(lineItems) == 7 and lineItems[3] == '|':
        return TYPE_LINE_BOOK_OPTION
    if checkDate(lineItems[0]) and checkTimestamp(lineItems[1]) and lineItems[2] == "TradeInfo":
        return TYPE_LINE_TRADED_VOLUME
    return TYPE_LINE_UNDEFINED

def parseBookDataOptionLine(lineItems):
    if (len(lineItems) < 7):
        return None
    bidVol = float(lineItems[1])
    bidPrice = float(lineItems[2])
    askPrice = float(lineItems[4])
    askVol = float(lineItems[5])
    return {'bidVolume': bidVol,
            'bidPrice': bidPrice,
            'askPrice': askPrice,
            'askVolume': askVol}


def get_exp_date(trade_date, holiday_dates):
    date = max(week[-4] for week in calendar.monthcalendar(trade_date.year, trade_date.month))
    if date >= trade_date.day:
        exp_date = datetime(year=trade_date.year, month=trade_date.month, day=date)
    else:
        if trade_date.month != 12:
            date = max(week[-4] for week in calendar.monthcalendar(trade_date.year, 1 + trade_date.month))
            exp_date = datetime(year=trade_date.year, month=1 + trade_date.month, day=date)
        else:
            date = max(week[-4] for week in calendar.monthcalendar(1 + trade_date.year, 1))
            exp_date = datetime(year=1 + trade_date.year, month=1, day=date)
    if datetime.strftime(exp_date, '%Y%m%d') in holiday_dates:
        exp_date = exp_date + timedelta(days=-1)
    return exp_date.replace(hour=15, minute=30)

def groupAndSortByTimeUpdates(instrumentUpdates):
    instrumentUpdates.sort(key=lambda x: x['timeOfUpdate'])
    groupedInstruments = []
    # groupby only works on already sorted elements, so we sorted first
    for timeOfUpdate, sameTimeInstruments in groupby(instrumentUpdates, lambda x: x['timeOfUpdate']):
        instruments = []
        for sameTimeInstrument in sameTimeInstruments:
            instruments.append(sameTimeInstrument)
        groupedInstruments.append([timeOfUpdate, instruments])
    return groupedInstruments

class InstrumentsFromFile():
    def __init__(self, fileName, expiryTime):
        self.fileName = fileName
        self.expiryTime = expiryTime
        self.currentInstrumentSymbol = None
        self.currentTimeOfUpdate = None
        self.currentBookData = None
        self.currentFutureBookData = None
        self.futureFlag = False

    def processLine(self, line):
        lineItems = line.split()
        lineItemType = validateLineItem(lineItems)
        if (lineItemType == TYPE_LINE_BOOK_DATA_STK):
            inst = None
            if self.currentInstrumentSymbol is not None:
                inst = {'stockInstrumentId' : self.currentInstrumentSymbol,
                        'tradeSymbol' : self.currentInstrumentSymbol,
                        'timeOfUpdate' : self.currentTimeOfUpdate,
                        'bookData' : self.currentBookData,
                        'expiryTime' : self.expiryTime,
                        'futureBookData' : self.currentFutureBookData}
            self.currentTimeOfUpdate = datetime.strptime(lineItems[0] + ' ' + lineItems[1], "%Y/%m/%d %H:%M:%S:%f")
            self.currentInstrumentSymbol = lineItems[4]
            self.currentBookData = None
            self.currentFutureBookData = None
            self.futureFlag = False
            return inst
        elif(lineItemType == TYPE_LINE_BOOK_OPTION):
            parsedOption = parseBookDataOptionLine(lineItems)
            if not self.futureFlag:
                if self.currentBookData is None:
                    self.currentBookData = {}
                    self.currentBookData['bidVolume'] = np.array([parsedOption['bidVolume']])
                    self.currentBookData['bidPrice'] = np.array([parsedOption['bidPrice']])
                    self.currentBookData['askPrice'] = np.array([parsedOption['askPrice']])
                    self.currentBookData['askVolume'] = np.array([parsedOption['askVolume']])
                else:
                    self.currentBookData['bidVolume'] = np.append(self.currentBookData['bidVolume'], parsedOption['bidVolume'])
                    self.currentBookData['bidPrice'] = np.append(self.currentBookData['bidPrice'], parsedOption['bidPrice'])
                    self.currentBookData['askPrice'] = np.append(self.currentBookData['askPrice'], parsedOption['askPrice'])
                    self.currentBookData['askVolume'] = np.append(self.currentBookData['askVolume'], parsedOption['askVolume'])
            else:
                if self.currentFutureBookData is None:
                    self.currentFutureBookData = {}
                    self.currentFutureBookData['bidVolume'] = np.array([parsedOption['bidVolume']])
                    self.currentFutureBookData['bidPrice'] = np.array([parsedOption['bidPrice']])
                    self.currentFutureBookData['askPrice'] = np.array([parsedOption['askPrice']])
                    self.currentFutureBookData['askVolume'] = np.array([parsedOption['askVolume']])
                else:
                    self.currentFutureBookData['bidVolume'] = np.append(self.currentFutureBookData['bidVolume'], parsedOption['bidVolume'])
                    self.currentFutureBookData['bidPrice'] = np.append(self.currentFutureBookData['bidPrice'], parsedOption['bidPrice'])
                    self.currentFutureBookData['askPrice'] = np.append(self.currentFutureBookData['askPrice'], parsedOption['askPrice'])
                    self.currentFutureBookData['askVolume'] = np.append(self.currentFutureBookData['askVolume'], parsedOption['askVolume'])
        elif(lineItemType == TYPE_LINE_TRADED_VOLUME):
            if not self.futureFlag:
                self.currentBookData['total_traded_value'] = lineItems[6]
                self.currentBookData['total_traded_size'] = lineItems[8]
            else:
                self.currentFutureBookData['total_traded_value'] = lineItems[6]
                self.currentFutureBookData['total_traded_size'] = lineItems[8]
        elif(lineItemType == TYPE_LINE_BOOK_DATA_FUT):
            self.futureFlag = True

    def processLinesIntoInstruments(self):
        with open(self.fileName, "r") as ins:
            instruments = []
            for line in ins:
                inst = self.processLine(line)
                if inst is not None:
                    instruments.append(inst)
            return instruments


class DataSource(object):
    def __init__(self, folderName, instrumentIds, startDateStr, endDateStr):
        self.startDate = datetime.strptime(startDateStr, "%Y%m%d")
        self.endDate = datetime.strptime(endDateStr, "%Y%m%d")
        self.folderName = folderName
        self.instrumentIds = instrumentIds
        self.currentDate = self.startDate

    def getFileName(self, date):
        dateStr = date.strftime("%Y%m%d")
        return '%s/%s/data' % (self.folderName, dateStr)

    def emitInstrumentUpdate(self, holidays):
        while (self.currentDate <= self.endDate):
            allInstrumentUpdates = []
            fileName = self.getFileName(self.currentDate)
            if not os.path.isfile(fileName):
                continue
            expiryTime = get_exp_date(self.currentDate, holidays)
            fileHandler = InstrumentsFromFile(fileName=fileName, expiryTime=expiryTime)
            instrumentUpdates = fileHandler.processLinesIntoInstruments()
            allInstrumentUpdates = allInstrumentUpdates + instrumentUpdates
            groupedInstrumentUpdates = groupAndSortByTimeUpdates(allInstrumentUpdates)
            for timeOfUpdate, instrumentUpdates in groupedInstrumentUpdates:
                yield([timeOfUpdate, instrumentUpdates])
            self.currentDate = self.currentDate + timedelta(days=1)


def getvwap(stockData):
    bid_vol, ask_vol, bid_price, ask_price = stockData['bidVolume'], stockData['askVolume'], stockData['bidPrice'], stockData['askPrice']
    volume = (np.sum(bid_vol) + np.sum(ask_vol))
    if volume > 0:
        price = (np.sum(bid_price * ask_vol) + np.sum(ask_price * bid_vol)) / (volume)  # Calculated for a vol = 0.12353
    else:
        price = (np.sum(bid_price) + np.sum(ask_price)) / (len(bid_price))
    return price

def getbidp(stockData):
    bid_price=stockData['bidPrice']
    return np.max(bid_price)

def getaskp(stockData):
    ask_price=stockData['askPrice']
    return np.max(ask_price)

def get_totalv(stockData):
    total_value=stockData['total_traded_value']
    return total_value
    #print(total_value)

def get_totals(stockData):
    total_size=stockData['total_traded_size']
    return total_size



def writecsv(csv_dir, results, m):
    # results = results.sort_index(axis=0, ascending=False)
    print('writing %s%s.csv' % (csv_dir, m))
    fileName = '%s%s.csv' % (csv_dir, m)
    if os.path.exists(fileName):
        csv_file = open(fileName, 'a')
        results.to_csv(csv_file, header=False)
    else:
        csv_file = open(fileName, 'w ')
        results.to_csv(csv_file, header=True)
    csv_file.close()


if __name__ == "__main__":
    folderName = 'spare/local/cjain/NSEDATA/'
    fileName = 'stocklist'
    holiday_dates = ['20160706', '20160815', '20160905', '20160913', '20161011', '20161012', '20161031', '20161114', 
                '20161225', '20170101', '20170126', '20170224', '20170313', '20170404', '20170414', '20170501',
                '20170626', '20170815', '20170825', '20171002', '20171019', '20171020', '20171225', 
                '20180101', '20180126', '20180213', '20180302', '20180329', '20180330', '20180501', 
                '20180815', '20180822', '20180913', '20180920', '20181002', '20181018', '20181107', '20181225']

    dates =  next(os.walk(folderName))[1]
    dates.sort()
    print(dates)
    instrumentIds = []
    with open(fileName, "r") as f:
        for line in f:
            lineItems = line.split()
            instrumentIds.append(lineItems[0])

    for date in dates:
        all_data = {}
        startDateStr = date
        endDateStr = date
        for instrumentId in instrumentIds:
            all_data[instrumentId] = pd.DataFrame(index=[pd.date_range(startDateStr + ' 09:16:00', periods=375, freq='60s')],
                                                  columns=['stockVWAP', 'futureVWAP','bidPrice','askPrice', 'total_size', 'total_value'])

        dataParser = DataSource(folderName, instrumentIds, startDateStr, endDateStr)
        groupedInstrumentUpdates = dataParser.emitInstrumentUpdate(holiday_dates)
        for timeOfUpdate, instrumentUpdates in groupedInstrumentUpdates:
            print(timeOfUpdate)
            for instrumentUpdate in instrumentUpdates:
                instrumentId = instrumentUpdate['tradeSymbol']
                if timeOfUpdate in all_data[instrumentId].index:
                    stockData = instrumentUpdate['bookData']
                    futureData = instrumentUpdate['futureBookData']
                    stockData['bidPrice'] = stockData['bidPrice'] / 100.0
                    stockData['askPrice'] = stockData['askPrice'] / 100.0
                    futureData['bidPrice'] = futureData['bidPrice'] / 100.0
                    futureData['askPrice'] = futureData['askPrice'] / 100.0
                    all_data[instrumentId].loc[timeOfUpdate, 'stockVWAP'] = getvwap(stockData)
                    all_data[instrumentId].loc[timeOfUpdate, 'futureVWAP'] = getvwap(futureData)
                    all_data[instrumentId].loc[timeOfUpdate, 'bidPrice'] = getbidp(stockData)
                    all_data[instrumentId].loc[timeOfUpdate, 'askPrice'] = getaskp(stockData)
                    all_data[instrumentId].loc[timeOfUpdate, 'total_value'] = get_totalv(stockData)
                    all_data[instrumentId].loc[timeOfUpdate, 'total_size'] = get_totals(stockData)




        for instrumentId in instrumentIds:
            writecsv('stock_data_new/', all_data[instrumentId], instrumentId)
            #writecsv('parsedData/', futureData['askPrice'], instrumentId)



StopIteration: ignored