In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Top Bonds

In [3]:
query = '''
    SELECT
        *,
        1.0 * RetailVolume / (InstitutionalVolume + RetailVolume) AS VolumeFraction,
        1.0 * RetailTrades / (InstitutionalTrades + RetailTrades) AS TradesFraction
    FROM (
        SELECT
            EOMONTH(A.TrdExctnDt) AS TrdExctnDtEOM,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN EntrdVolQt ELSE 0 END) AS InstitutionalVolume,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN EntrdVolQt ELSE 0 END) AS RetailVolume,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN 1 ELSE 0 END) AS InstitutionalTrades,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN 1 ELSE 0 END) AS RetailTrades,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt >= 500000 THEN A.CusipId END)) AS InstitutionalCusips,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt < 250000 THEN A.CusipId END)) AS RetailCusips
        FROM
            Trace_filteredWithRatings A
        INNER JOIN
            BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        WHERE
            A.RatingNum <> 0
        GROUP BY
            EOMONTH(A.TrdExctnDt)
    ) A
    ORDER BY
        TrdExctnDtEOM
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/retailShare-topBonds.csv', index=False)

### Non-Top Bonds

In [4]:
query = '''
    SELECT
        *,
        1.0 * RetailVolume / (InstitutionalVolume + RetailVolume) AS VolumeFraction,
        1.0 * RetailTrades/ (InstitutionalTrades + RetailTrades) AS TradesFraction
    FROM (
        SELECT
            EOMONTH(A.TrdExctnDt) AS TrdExctnDtEOM,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN EntrdVolQt ELSE 0 END) AS InstitutionalVolume,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN EntrdVolQt ELSE 0 END) AS RetailVolume,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN 1 ELSE 0 END) AS InstitutionalTrades,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN 1 ELSE 0 END) AS RetailTrades,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt >= 500000 THEN A.CusipId END)) AS InstitutionalCusips,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt < 250000 THEN A.CusipId END)) AS RetailCusips
        FROM
            Trace_filteredWithRatings A
        LEFT JOIN 
            BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        WHERE
            A.RatingNum <> 0
            AND B.CusipId IS NULL
        GROUP BY
            EOMONTH(A.TrdExctnDt)
    ) A
    ORDER BY
        TrdExctnDtEOM
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/retailShare-nonTopBonds.csv', index=False)