### Read Smallcases

In [216]:
import re
import numpy as np
import pandas as pd
import requests

In [217]:
# Connect to services
import os
import sys

backend_path = os.path.abspath(os.path.join(os.getcwd(), '../backend'))
sys.path.append(backend_path)

from app.internal.firebase import db, bucket

In [218]:
ids = [
    {"id": "SCAW_0001", "name": "All Weather Investing"},
    {"id": "SCMO_0015", "name": "Safe Haven"},
    {"id": "SCMO_0026", "name": "Growth & Income"},
    {"id": "SCMO_0016", "name": "The Naked Trader"},
    {"id": "SCMO_0024", "name": "The PE List"},
    {"id": "SCMO_0013", "name": "Dividend Stars"},
    {"id": "SCMO_0014", "name": "Dividend Aristocrats"},
    {"id": "SCNM_0025", "name": "Electric Mobility"},
    {"id": "SCNM_0012", "name": "Rising Rural Demand"},
    {"id": "SCTR_0014", "name": "House of Tata"},
    {"id": "SCTR_0017", "name": "House of Murugappa"},
    {"id": "SCTR_0010", "name": "Realty Tracker"},
]
smallcase_id = ids[8]["id"]
smallcase_sheet_path = f"data/smallcases/{smallcase_id}_Timeline.xlsx"
smallcase_sheet = pd.ExcelFile(smallcase_sheet_path)

constituents = pd.read_excel(smallcase_sheet, sheet_name="Historical Constituents")
constituents.head()

Unnamed: 0,Date Range,Constituents,Weightage
0,2016-04-04 to 2016-05-31,Mahindra and Mahindra Financial Services Ltd,0.06
1,,Bharat Financial Inclusion Ltd,0.06
2,,Satin Creditcare Network Ltd,0.06
3,,ITC Ltd,0.06
4,,Emami Ltd,0.06


In [219]:
indexes = pd.read_excel(smallcase_sheet, sheet_name="Historical Index Values")
indexes.head()

Unnamed: 0,Date,Rising Rural Demand,NIFTY 100,Rebalance Occured
0,2016-04-04,100.0,100.0,
1,2016-04-05,98.29,98.07,
2,2016-04-06,99.06,98.29,
3,2016-04-07,98.87,97.48,
4,2016-04-08,99.64,97.65,


### Fetch and POST Smallcase and Stats

In [220]:
smallcase_base_url = "https://api.smallcase.com"
resp = requests.get(f"{smallcase_base_url}/smallcases/smallcase?scid={smallcase_id}")

resp.status_code

200

In [221]:
local_base_url = "http://localhost:7999"
smallcase = resp.json()["data"]
stats = smallcase["stats"]

def remove_p_tag(text: str):
    return re.sub(r"</?(p|ul|li)>", "", text)

methodologies = []
for m in smallcase["methodology"]:
    methodologies.append({
        "key": m["key"],
        "details": remove_p_tag(m["content"]),
    })

investment_strategies = []
for i in smallcase["info"]["investmentStrategy"]:
    investment_strategies.append(i["key"])

payload = {
    "id": smallcase_id,
    "name": smallcase["info"]["name"],
    "slug": smallcase["info"]["slug"],
    "description": smallcase["info"]["shortDescription"],
    "volatility": stats["ratios"]["riskLabel"],
    "popularity_rank": smallcase["flags"]["popular"]["rank"],
    "contains_etf": smallcase["flags"]["containsEtf"],
    "contains_stock": smallcase["flags"]["containsStock"],
    "constituent_count": smallcase["constituentsCount"],
    "growth_since_launch": {
        "cagr": stats["ratios"]["cagr"],
        "returns": stats["returns"]["sinceInception"],
        "duration": stats["ratios"]["cagrDuration"],
    },
    "benchmark": {
        "id": smallcase["benchmark"]["id"],
        "index": smallcase["benchmark"]["index"],
        "details": smallcase["benchmark"]["msg"],
    },
    "methodologies": methodologies,
    "launch_date": smallcase["info"]["uploaded"],
    "inception_date": smallcase["info"]["created"],
    "last_rebalance_date": smallcase["info"]["lastRebalanced"],
    "next_rebalance_date": smallcase["info"]["nextUpdate"],
    "investment_strategies": investment_strategies,
    "rebalance_frequency": smallcase["info"]["rebalanceSchedule"],
}

local_resp = requests.post(f"{local_base_url}/smallcases/", json=payload)
local_resp.status_code == 201

True

In [222]:
year = "2024"
month = "05"

payload = {
    "year": year,
    "month": month,
    "min_sip_amount": stats["minSipAmount"],
    "investor_count": stats["investorCount"],
    "subscriber_count": stats["subscriberCount"],
    
    "cagr": {
        "one_year": stats["ratios"]["cagr1y"],
        "three_year": stats["ratios"]["cagr3y"],
        "five_year": stats["ratios"]["cagr5y"],
    },
    "returns": {
        "monthly": stats["returns"]["monthly"],
        "quarterly": stats["returns"]["quarterly"],
        "half_year": stats["returns"]["halfyearly"],
        "one_year": stats["returns"]["yearly"],
        "three_year": stats["returns"]["threeYear"],
        "five_year": stats["returns"]["fiveYear"],
    },
    "ratios": {
        "dividend_yield": stats["ratios"]["divYield"],
        "dividend_yield_differential": stats["ratios"]["divYieldDifferential"],

        "risk": stats["ratios"]["risk"],
        "pe": stats["ratios"]["pe"],
        "pb": stats["ratios"]["pb"],
        "beta": stats["ratios"]["beta"],
        "sharpe": stats["ratios"]["sharpeRatio"],
    },
    "weightage": {
        "large_cap": stats["ratios"]["largeCapPercentage"],
        "mid_cap": stats["ratios"]["midCapPercentage"],
        "small_cap": stats["ratios"]["smallCapPercentage"],
        "market_category": stats["ratios"]["marketCapCategory"]
    }
}

local_resp = requests.post(f"{local_base_url}/smallcases/{smallcase_id}/statistics/", json=payload)
local_resp.status_code == 201

True

### Upload XLSX to bucket

In [223]:
blob = bucket.blob(f"smallcases/{smallcase_id}/timeline.xlsx")
blob.upload_from_filename(smallcase_sheet_path)

### Map and POST Smallcase Name to Upstox Instruments

In [224]:
postable_constituents = constituents["Constituents"].unique()
len(postable_constituents)

56

In [225]:
successful_constitents = []
failed_constitents = []

async def query_ticker_name(name: str):
    return await db.collection("tickers").where("name", ">=", name).where("name", "<=", name + '\uf8ff').get()

for c in postable_constituents:
    q = c.upper()
    docs = await query_ticker_name(q)
    if len(docs) == 0:
        failed_constitents.append(c)
    else:
        for e in docs:
            await db.collection("tickers").document(e.id).update({
                "smallcase_name": c
            })
            successful_constitents.append((c, e.id))

  return query.where(field_path, op_string, value)
  return await db.collection("tickers").where("name", ">=", name).where("name", "<=", name + '\uf8ff').get()


In [226]:
len(successful_constitents), len(failed_constitents)

(16, 40)

In [227]:
# # All return upper

def replace_ltd(s):
    return s.upper().replace("LTD", "LIMITED")

# def replace_technologies(s):
#     return s.upper().replace("TECHNOLOGIES", "TECHNO.")

# def replace_company(s):
#     return s.upper().replace("COMPANY", "CO")

# def replace_corporation(s):
#     return s.upper().replace("CORPORATION", "CORP")

# Ltd -> Limited
for f in failed_constitents:
    q = replace_ltd(f)
    docs = await query_ticker_name(q)
    if len(docs) != 0:
        for e in docs:
            await db.collection("tickers").document(e.id).update({
                "smallcase_name": f
            })
            successful_constitents.append((f, e.id))
            failed_constitents.remove(f)

  return await db.collection("tickers").where("name", ">=", name).where("name", "<=", name + '\uf8ff').get()


In [228]:
len(successful_constitents), len(failed_constitents)

(27, 29)

In [229]:
nse = pd.read_csv("data/upstox/NSE.csv")
equities = nse.query('instrument_type == "EQUITY" and exchange == "NSE_EQ"')
equities.head()

Unnamed: 0,instrument_key,exchange_token,tradingsymbol,name,last_price,expiry,strike,tick_size,lot_size,instrument_type,option_type,exchange
77,NSE_EQ|DUMMYSAN005,14747.0,011NSETEST,011NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
78,NSE_EQ|DUMMYSAN006,14751.0,021NSETEST,021NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
79,NSE_EQ|DUMMYSAN007,14753.0,031NSETEST,031NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
80,NSE_EQ|DUMMYSAN008,14755.0,041NSETEST,041NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ
81,NSE_EQ|DUMMYSAN009,14758.0,051NSETEST,051NSETEST,,,,0.05,1.0,EQUITY,,NSE_EQ


In [230]:
import difflib
df = equities.filter(['name', "exchange_token"])
tickers = pd.DataFrame() 
r = {}
for f in failed_constitents:
    df['similarity'] = df["name"].apply(lambda x: difflib.SequenceMatcher(None, x.lower(), f.lower()).ratio())
        
    top_match = df.sort_values(by='similarity', ascending=False).head(1)
    top_match['smallcase_name'] = f
        
    tickers = pd.concat([tickers, top_match], ignore_index=True)

tickers

Unnamed: 0,name,exchange_token,similarity,smallcase_name
0,DCM FINANCIAL SERVICES LT,8704.0,0.695652,Mahindra and Mahindra Financial Services Ltd
1,BHARATIYA GLOBAL INFO LTD,11371.0,0.654545,Bharat Financial Inclusion Ltd
2,SATIN CREDIT NET LTD,10453.0,0.833333,Satin Creditcare Network Ltd
3,KAVERI SEED CO. LTD.,14972.0,0.837209,Kaveri Seed Company Ltd
4,COROMANDEL INTERNTL. LTD,739.0,0.884615,Coromandel International Ltd
5,CHAMBAL FERTILIZERS LTD,637.0,0.733333,Chambal Fertilisers and Chemicals Ltd
6,ESCORTS KUBOTA LIMITED,958.0,0.9,Escorts Kubota Ltd
7,EVEREST INDUSTRIES LTD,993.0,0.784314,Eveready Industries India Ltd
8,MAHINDRA & MAHINDRA LTD,2031.0,0.916667,Mahindra and Mahindra Ltd
9,THE RAMCO CEMENTS LIMITED,2043.0,0.913043,Ramco Cements Limited


In [213]:
incorrect = [0, 1, 7, 10, 24, ]
corrections = {
    "7": "11782"
    
}

cannot_correct = len(incorrect) - len(corrections)
for i in incorrect:
    n = tickers.iloc[i]["smallcase_name"]
    correct_token = corrections.get(str(i), None)
    if correct_token:
        correct = equities.query(f"exchange_token == {correct_token}").iloc[0]
        tickers.loc[i, ['exchange_token', 'name', 'similarity']] = [correct_token, correct['name'], 1.0]

for i in incorrect:
    correct_token = corrections.get(str(i), None)
    if not correct_token:
        tickers.drop(i, inplace=True)
cannot_correct, tickers.shape

(1, (11, 4))

2, 15 for Safe Haven do not exist (Polaris Consulting & Monsanto were merged w other companies).
Trade-off -> 

We cannot fetch market data for these stocks, so we do not add them to the equities list.
When rebalancing for Kelly, we will ignore these stocks, maintaining their original weightage and assess the implications.

In [214]:
payload = tickers[['exchange_token', 'smallcase_name']].to_dict(orient='records')
for p in payload:
    id = str(int(p['exchange_token']))
    await db.document(f"tickers/{id}").update({"smallcase_name": p["smallcase_name"]})
    successful_constitents.append((p["smallcase_name"], id))

In [215]:

len(successful_constitents), len(successful_constitents) == (len(postable_constituents) - cannot_correct)

(29, True)

All Weather = 5

Safe Haven = 73

Growth & Income = 102 (~10 corrections)

The Naked Trader = 115 (~12 corrections)

The PE List = 37 (7 corrections, 3 possible)

Dividend Stars = 38

Dividend Aristocrats = 31 (3 corrections, 2 possible)

Electric Mobility = 29 (1 correction)

