In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
import os
import sys
import datetime
import numpy as np
import re
import logging

logging.basicConfig(
    format="Date-Time : %(asctime)s : Line No. : %(lineno)d - %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger()
from joblib import Parallel, delayed

num_cores = 4
from tqdm import tqdm

rawBhavCopyDataFldr = (
    r"C:\Users\ksdee\Documents\PersonalFinance\Trading\getbhavcopy\data"
)
rawBhavCopyFutureDataFldr = (
    r"C:\Users\ksdee\Documents\PersonalFinance\Trading\getbhavcopy\data\futures"
)
bhavCopyToolFl = r"C:\Users\ksdee\Documents\PersonalFinance\Trading\Trading_Data\NSEData\bhavCopyTool.csv"
bhavCopyToolFutureFl = r"C:\Users\ksdee\Documents\PersonalFinance\Trading\Trading_Data\NSEData\bhavCopyToolFuture.csv"

In [3]:
cols = [
    "filename",
    "symbol",
    "date",
    "open",
    "high",
    "low",
    "close",
    "volume",
    "delivery",
]
try:
    df = pd.read_csv(bhavCopyToolFl)
    df["date"] = pd.to_datetime(df["date"]).apply(lambda x: x.date())
    # df = df.loc[df.date < datetime.date(2022, 3, 4), :]
except:
    df = pd.DataFrame(columns=cols)
existingFiles = list(df.filename.values)


def readBhavCopyFiles(fl):
    global existingFiles, rawBhavCopyDataFldr, logger
    logger.info(f"processing for {fl}")
    temp = pd.DataFrame(columns=cols)
    if os.path.isfile(rawBhavCopyDataFldr + os.path.sep + fl):
        if fl in existingFiles:
            return temp
        else:
            ff = open(rawBhavCopyDataFldr + os.path.sep + fl, "rb")
            lines = ff.readlines()
            cnt = 0
            for ln in lines:
                if cnt == 0:
                    cnt += 1
                    continue
                else:
                    cnt += 1
                    try:
                        x = str(ln.strip()).split(",")
                        filename = fl
                        symbol = x[0][2:]
                        date = datetime.datetime.strptime(x[1], "%Y%m%d")
                        openPrice = float(x[2])
                        high = float(x[3])
                        low = float(x[4])
                        close = float(x[5])
                        volume = int(re.sub("[^0-9]", "0", x[6]))
                        delivery = int(re.sub("[^0-9]", "0", x[7]))
                        temp = temp.append(
                            pd.DataFrame(
                                {
                                    "filename": [filename],
                                    "symbol": [symbol],
                                    "date": [date],
                                    "open": [openPrice],
                                    "high": high,
                                    "low": [low],
                                    "close": [close],
                                    "volume": [volume],
                                    "delivery": [delivery],
                                }
                            )
                        )
                    except:
                        None
            return temp


flist = pd.Series(os.listdir(rawBhavCopyDataFldr))
flist = flist[~flist.isin(existingFiles)]
result = Parallel(n_jobs=num_cores, backend="threading")(
    delayed(readBhavCopyFiles)(fl) for fl in flist
)
try:
    df = df.append(pd.concat(result))
    df.to_csv(bhavCopyToolFl, index=False)
except:
    logger.error("error 1")

Date-Time : 2022-04-22 20:44:34,774 : Line No. : 23 - processing for 2022-04-22-NSE-EQ.txt
Date-Time : 2022-04-22 20:44:34,786 : Line No. : 23 - processing for futures


In [4]:
colFtr = ["filename", "symbol", "date", "open", "high", "low", "close", "volume", "OI"]
try:
    df = pd.read_csv(bhavCopyToolFutureFl)
    df["date"] = pd.to_datetime(df["date"]).apply(lambda x: x.date())
except:
    df = pd.DataFrame(columns=colFtr)

existingFiles = list(df.filename.values)


def readBhavCopyFiles(fl):
    global existingFiles, rawBhavCopyFutureDataFldr, logger, colFtr
    temp = pd.DataFrame(columns=colFtr)
    if os.path.isfile(rawBhavCopyFutureDataFldr + os.path.sep + fl):
        if fl in existingFiles:
            return temp
        else:
            logger.info(f"processing for {fl}")
            ff = open(rawBhavCopyFutureDataFldr + os.path.sep + fl, "rb")
            lines = ff.readlines()
            cnt = 0
            for ln in lines:
                if cnt == 0:
                    cnt += 1
                    continue
                else:
                    cnt += 1
                    try:
                        x = str(ln.strip()).split(",")
                        filename = fl
                        symbol = x[0][2:]
                        date = datetime.datetime.strptime(x[1], "%Y%m%d")
                        openPrice = float(x[2])
                        high = float(x[3])
                        low = float(x[4])
                        close = float(x[5])
                        volume = int(x[6].split(".")[0])
                        delivery = int(re.sub("[^0-9]", "", x[7].split(".")[0]))
                        temp = temp.append(
                            pd.DataFrame(
                                {
                                    "filename": [filename],
                                    "symbol": [symbol],
                                    "date": [date],
                                    "open": [openPrice],
                                    "high": high,
                                    "low": [low],
                                    "close": [close],
                                    "volume": [volume],
                                    "OI": [delivery],
                                }
                            )
                        )
                    except:
                        logger.error(f"Partse Error {ln}")
                        None
            return temp


flist = pd.Series(os.listdir(rawBhavCopyFutureDataFldr))
flist = flist[~flist.isin(existingFiles)]
result = Parallel(n_jobs=num_cores, backend="threading")(
    delayed(readBhavCopyFiles)(fl) for fl in flist
)
try:
    result = pd.concat(result)
    result["symbol"] = result.symbol.apply(lambda x: x.split("-")[0].strip())
    result = (
        result.groupby(["filename", "symbol", "date"])
        .agg(
            {
                "open": np.mean,
                "high": np.mean,
                "low": np.mean,
                "close": np.mean,
                "volume": np.sum,
                "OI": np.sum,
            }
        )
        .reset_index()
    )

    df = df.append(result)
    df.to_csv(bhavCopyToolFutureFl, index=False)
except:
    logger.error("error 1")

Date-Time : 2022-04-22 20:45:06,048 : Line No. : 18 - processing for 2022-04-22-NSE-FO.txt


In [5]:
def getNifty500():

    nList = "nifty500"
    url = "https://archives.nseindia.com/content/indices/ind_" + nList + "list.csv"

    return pd.read_csv(url)


currNifty500 = getNifty500()

In [6]:
symbolChange = pd.read_csv(
    "https://www1.nseindia.com/content/equities/symbolchange.csv",
    encoding="ISO-8859-1",
    engine="python",
)
symbolChange.columns = [
    "SYMB_COMPANY_NAME",
    "SM_KEY_SYMBOL",
    "SM_NEW_SYMBOL",
    "SM_APPLICABLE_FROM",
]
symbolChange["SM_APPLICABLE_FROM"] = pd.to_datetime(
    symbolChange["SM_APPLICABLE_FROM"]
).apply(lambda x: x.date())

In [7]:
future = pd.read_csv(bhavCopyToolFutureFl)
future["date"] = pd.to_datetime(future["date"]).apply(lambda x: x.date())
dfStocks = pd.read_csv(bhavCopyToolFl)
dfStocks["date"] = pd.to_datetime(dfStocks["date"]).apply(lambda x: x.date())

In [8]:
symbolChange = symbolChange.loc[
    symbolChange.SM_APPLICABLE_FROM <= datetime.date.today(), :
]
symbolChange.sort_values(by=["SM_APPLICABLE_FROM"], ascending=True, inplace=True)
for _, row in symbolChange.iterrows():
    dfStocks.symbol.replace(
        to_replace=row["SM_KEY_SYMBOL"], value=row["SM_NEW_SYMBOL"], inplace=True
    )
    future.symbol.replace(
        to_replace=row["SM_KEY_SYMBOL"], value=row["SM_NEW_SYMBOL"], inplace=True
    )

In [9]:
dirMap = {
    "FINNIFTY": "NIFTYFINSERVICE",
    "MIDCPNIFTY": "MIDCAP50",
    "NIFTY": "NSENIFTY",
    "BANKNIFTY": "BANKNIFTY",
}
for ky, val in dirMap.items():
    future.symbol.replace(to_replace=ky, value=val, inplace=True)

In [10]:
ohlcData = pd.merge(
    left=dfStocks[
        ["symbol", "date", "open", "high", "low", "close", "volume", "delivery"]
    ],
    right=future[["symbol", "date", "OI"]],
    left_on=["symbol", "date"],
    right_on=["symbol", "date"],
    how="left",
)

In [11]:
indexes = [
    "NIFTY200",
    "NIFTY100",
    "NSE500",
    "NSENIFTY",
    "MIDCAP50",
    "BANKNIFTY",
    "NIFTYFINSERVICE",
    "NIFTYFMGC",
    "NSEIT",
    "NIFTYMEDIA",
    "NIFTYPVTBANK",
    "NIFTYPSUBANK",
    "NIFTYENERGY",
]
for ind in indexes:
    currNifty500 = currNifty500.append(
        pd.DataFrame(
            {"Company Name": [f"IND_{ind}"], "Industry": ["INDEX"], "Symbol": [ind]}
        )
    )
currNifty500 = currNifty500[["Company Name", "Industry", "Symbol"]].drop_duplicates()
currNifty500.columns = ["company", "industry", "symbol"]

In [12]:
pd.merge(
    left=ohlcData, right=currNifty500, left_on="symbol", right_on="symbol", how="inner"
).to_csv(
    r"C:\Users\ksdee\Documents\PersonalFinance\Trading\Trading_Data\NSEData\ohlcWithDelOI.csv",
    index=False,
)

In [13]:
temp = pd.merge(
    left=ohlcData, right=currNifty500, left_on="symbol", right_on="symbol", how="inner"
)

In [14]:
dfStocks.loc[dfStocks.symbol == "LEMONTREE", "date"].max()

datetime.date(2022, 4, 22)

In [15]:
temp.date.max()

datetime.date(2022, 4, 22)