In [146]:
import numpy as np
import pandas as pd

In [147]:
import requests
base_url = "http://localhost:7999"
upstox_url = "https://api.upstox.com/v2"

### NSE Tickers Wrangling

In [148]:
instruments = pd.read_csv("data/upstox/NSE.csv")
instruments.head()

Unnamed: 0,instrument_key,exchange_token,tradingsymbol,name,last_price,expiry,strike,tick_size,lot_size,instrument_type,option_type,exchange
0,NSE_INDEX|Nifty 50,17.0,,Nifty 50,21995.85,,,,,INDEX,,NSE_INDEX
1,NSE_INDEX|NIFTY100 EQL Wgt,,,NIFTY100 EQL Wgt,29240.4,,,,,INDEX,,NSE_INDEX
2,NSE_INDEX|NIFTY50 EQL Wgt,,,NIFTY50 EQL Wgt,27648.65,,,,,INDEX,,NSE_INDEX
3,NSE_INDEX|NiftyM150Momntm50,,,NiftyM150Momntm50,54049.45,,,,,INDEX,,NSE_INDEX
4,NSE_INDEX|Nifty Auto,48.0,,Nifty Auto,21520.75,,,,,INDEX,,NSE_INDEX


In [149]:
equities = instruments.query('instrument_type == "EQUITY" and exchange == "NSE_EQ"')
equities.head()

Unnamed: 0,instrument_key,exchange_token,tradingsymbol,name,last_price,expiry,strike,tick_size,lot_size,instrument_type,option_type,exchange
77,NSE_EQ|DUMMYSAN005,14747.0,011NSETEST,011NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
78,NSE_EQ|DUMMYSAN006,14751.0,021NSETEST,021NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
79,NSE_EQ|DUMMYSAN007,14753.0,031NSETEST,031NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
80,NSE_EQ|DUMMYSAN008,14755.0,041NSETEST,041NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
81,NSE_EQ|DUMMYSAN009,14758.0,051NSETEST,051NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ


In [150]:
indexes = instruments.query('instrument_type == "INDEX" and exchange == "NSE_INDEX"')
indexes.head()

Unnamed: 0,instrument_key,exchange_token,tradingsymbol,name,last_price,expiry,strike,tick_size,lot_size,instrument_type,option_type,exchange
0,NSE_INDEX|Nifty 50,17.0,,Nifty 50,21995.85,,,,,INDEX,,NSE_INDEX
1,NSE_INDEX|NIFTY100 EQL Wgt,,,NIFTY100 EQL Wgt,29240.4,,,,,INDEX,,NSE_INDEX
2,NSE_INDEX|NIFTY50 EQL Wgt,,,NIFTY50 EQL Wgt,27648.65,,,,,INDEX,,NSE_INDEX
3,NSE_INDEX|NiftyM150Momntm50,,,NiftyM150Momntm50,54049.45,,,,,INDEX,,NSE_INDEX
4,NSE_INDEX|Nifty Auto,48.0,,Nifty Auto,21520.75,,,,,INDEX,,NSE_INDEX


GOAL ~ 2400 NSE listed companies
Remove:
- Government issued instruments
- Anything with %
- NSETEST

In [151]:
randoms = ["%", "NSETEST"]
goi_issues = ["GOI STRIPS", "GOI TBILL", "GOI LOAN"]

def remove_names(name):
    return all(i not in name for i in randoms + goi_issues)

equities = equities[equities["name"].apply(lambda x: remove_names(x))]
equities["exchange"] = equities["exchange"].str.replace("NSE_EQ", "NSE")

equities.shape


(2894, 12)

Remove Indexes with no exchange token

In [152]:
indexes.replace(np.nan, None, inplace=True)
indexes.dropna(subset=["exchange_token"], inplace=True)
indexes["exchange"] = indexes["exchange"].str.replace("NSE_INDEX", "NSE")

indexes.shape

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  indexes.replace(np.nan, None, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  indexes.dropna(subset=["exchange_token"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  indexes["exchange"] = indexes["exchange"].str.replace("NSE_INDEX", "NSE")


(43, 12)

### POST Equities to BE

In [153]:
rename_cols = {
    "tradingsymbol": "ticker",
    "instrument_key": "upstox_instrument_key"
}
cols = ["exchange", "exchange_token", "name", "ticker", "instrument_type", "upstox_instrument_key", "lot_size"]


In [60]:
def post_tickers(tickers):
    s, f = 0, 0
    failed = []
    for p in tickers:
        resp = requests.post(base_url + "/tickers/", json=p)
        if resp.status_code == 201:
            s += 1
        else:
            f += 1
            failed.append(p)
            print(resp.json())
    return s, f, failed

In [62]:
equities.rename(columns=rename_cols, inplace=True)
payload = equities[cols].to_dict(orient="records")

s, f, failed = post_tickers(payload)

print(f"Success: {s}, Failure: {f}")
print(f"Failed {failed}")


Success: 2894, Failure: 0
Failed []


In [None]:
# indexes.rename(columns=rename_cols, inplace=True)
# payload = indexes[cols].to_dict(orient="records")

# s, f, failed = post_tickers(payload[0:5])

# print(f"Success: {s}, Failure: {f}")
# print(f"Failed {failed}")

04.05 - 43 Indexes + 6563 equities posted

05.05 - Deleted and posted only relevant equities 2894 (indexes have overlapping exchange tokens, do NOT post yet)

### Fetch candle stick data and POST to Equities

In [154]:
import calendar
from datetime import datetime

equities.rename(columns=rename_cols, inplace=True)
payload = equities[cols].to_dict(orient="records")

years = ["2023", "2022", "2021", "2020", "2019", "2018", "2017", "2016"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

def get_last_day_of_month(year: int, month: int) -> int:
    return calendar.monthrange(year, month)[1]

def format_date_path(date: datetime) -> str:
    return date.strftime("%Y-%m-%d")


s, f, p = 0, 0, 0
failed = []
passive = []
for equity in payload:
    exchange_token = int(equity["exchange_token"])
    instrument_key = equity["upstox_instrument_key"]

    for year in years:
        for month in months:
            f = True
            last_day = get_last_day_of_month(int(year), int(month))
            from_date = f"{year}-{month}-01"
            to_date = f"{year}-{month}-{last_day}"

            try:
                resp = requests.get(upstox_url + f"/historical-candle/{instrument_key}/day/{to_date}/{from_date}")
                if resp.status_code == 200:
                    candles = resp.json()["data"]["candles"]
                    if len(candles) > 0:
                        request = { "month": month, "year": year }

                        # Daily candles
                        daily = {}
                        running_high, running_low = 0, 0
                        for candle in candles:
                            timestamp = format_date_path(datetime.fromisoformat(candle[0]))
                            high = candle[2]
                            low = candle[3]
                            
                            daily[timestamp] = {
                                "open": candle[1],
                                "high": candle[2],
                                "low": candle[3],
                                "close": candle[4],
                                "volume": candle[5],
                                "open_interest": candle[6]
                            }
                        request["daily"] = daily

                        # Monthly candle
                        monthly_high = max(entry["high"] for entry in daily.values())
                        monthly_low = min(entry["low"] for entry in daily.values())
                        monthly_volume = sum(entry["volume"] for entry in daily.values())
                        monthly_open = candles[-1][1]
                        monthly_close = candles[0][4]
                        request["monthly"] = {
                            "open": monthly_open,
                            "high": monthly_high,
                            "low": monthly_low,
                            "close": monthly_close,
                            "volume": monthly_volume
                        }

                        internal_resp = requests.post(base_url + f"/tickers/{exchange_token}/candles", json=request)
                        if internal_resp.status_code == 201:
                            s += 1
                        else:
                            failed.append(equity)
                            print("BE failed", internal_resp.status_code, internal_resp.json())
                    else:
                        f = False
                        passive.append(equity)
                        break
                else:
                    failed.append(equity)
                    print("Upstox failed", resp.status_code, resp.json())

            except Exception as e:
                failed.append(equity)
                print("Exception", e)
        if not f:
            break

months_per_equity = len(months) * len(years)
print(f"Success: {s / months_per_equity}, Failure: {len(failed)}, Passive: {len(passive)}")


In [156]:
# 1st iteration | 25m55s | s = 1869 | f = 0 | p = 1822 | x = 37

last_stock_id = 5549
x1 = next((i for (i, d) in enumerate(payload) if int(d["exchange_token"]) == last_stock_id), None) # no of stocks posted
s , len(failed), len(passive), x1


(1869, 0, 1822, 37)

We have understood, it is unfeasible to scrape all the data. 

Every stock took 45 seconds in the 1st iteration.
45 * 3000 = 135,000 = ~33 hours

Even after refactoring to optimse this by a factor of 2 to 22 seconds, it will take ~15 hours

Even if it is optimised to an unrealistic 10s, it will take ~8 hours

Instead, we deploy this code as a CF. Find only the equities we need from the Smallcases and invoke the CF to fetch data for that specific stock. This should reduce the equity universe by at least 80%.

22 * 0.2 * 3000 = 13,200 = ~3 hours which is much more manageable

## External API

In [124]:
import requests

url = "https://api.upstox.com/v2/historical-candle/NSE_EQ|INE144J01027/month/2023-02-01/2023-01-01"

payload={}
headers = {
  'Accept': 'application/json'
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)

{"status":"success","data":{"candles":[["2023-02-01T00:00:00+05:30",81.4,82.45,66.65,67.3,1564023,0],["2023-01-01T00:00:00+05:30",91.05,95.3,77,80.5,2362341,0]]}}


In [142]:
import requests

url = "https://api.upstox.com/v2/historical-candle/NSE_EQ|INE253B01015/day/2022-12-31/2022-01-01"

payload={}
headers = {
  'Accept': 'application/json'
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)

{"status":"success","data":{"candles":[["2022-12-30T00:00:00+05:30",22,22.55,22,22.55,61746,0],["2022-12-29T00:00:00+05:30",22.7,22.7,22.1,22.15,6770,0],["2022-12-28T00:00:00+05:30",22.75,23.2,22.3,22.35,13629,0],["2022-12-27T00:00:00+05:30",22.75,22.75,22.05,22.75,6910,0],["2022-12-26T00:00:00+05:30",22.5,22.75,22.3,22.35,3949,0],["2022-12-23T00:00:00+05:30",22.75,22.75,22.75,22.75,1243,0],["2022-12-22T00:00:00+05:30",23.2,23.6,23.2,23.2,3832,0],["2022-12-21T00:00:00+05:30",24.2,24.2,23.35,23.65,13508,0],["2022-12-20T00:00:00+05:30",23.85,23.85,22.95,23.8,15752,0],["2022-12-19T00:00:00+05:30",23.4,23.4,23.4,23.4,4397,0],["2022-12-16T00:00:00+05:30",22.95,22.95,22.95,22.95,4014,0],["2022-12-15T00:00:00+05:30",22.5,22.8,22.3,22.5,3979,0],["2022-12-14T00:00:00+05:30",22.45,22.8,22.2,22.5,5431,0],["2022-12-13T00:00:00+05:30",22.45,22.9,22.2,22.45,5174,0],["2022-12-12T00:00:00+05:30",22.6,23.35,22.45,22.55,9161,0],["2022-12-09T00:00:00+05:30",22.6,23.05,22.35,22.9,6192,0],["2022-12-08T00:0