In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Top Bonds

In [5]:
query = '''
    SELECT
        *
    FROM (
        SELECT
            EOMONTH(A.TrdExctnDt) AS TrdExctnDtEOM,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN EntrdVolQt ELSE 0 END) AS InstitutionalVolume,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN EntrdVolQt ELSE 0 END) AS RetailVolume,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN 1 ELSE 0 END) AS InstitutionalTrades,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN 1 ELSE 0 END) AS RetailTrades,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt >= 500000 THEN A.CusipId END)) AS InstitutionalCusips,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt < 250000 THEN A.CusipId END)) AS RetailCusips
        FROM
            Trace_filtered_withRatings A
        INNER JOIN (
            SELECT
                CusipId,
                TrdExctnDtEOM
            FROM (
                SELECT
                    *,
                    DENSE_RANK() OVER (PARTITION BY IssuerId, TrdExctnDtEOM ORDER BY Volume DESC) AS VolumeRanking
                FROM (
                    SELECT
                        IssuerId,
                        CusipId,
                        EOMONTH(TrdExctnDt) AS TrdExctnDtEOM,
                        SUM(EntrdVolQt) AS Volume
                    FROM
                        Trace_filtered_withRatings
                    WHERE
                        RatingNum <> 0
                        AND EntrdVolQt >= 500000 -- institunional
                        AND PrincipalAmt IS NOT NULL
                        AND TrdExctnDt <= EOMONTH(TrdExctnDt) AND TrdExctnDt > DATEADD(DAY, -5, EOMONTH(TrdExctnDt))
                    GROUP BY
                        IssuerId,
                        CusipId,
                        EOMONTH(TrdExctnDt)
                ) A
            ) B
            WHERE
                VolumeRanking <= 3
        ) B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        GROUP BY
            EOMONTH(A.TrdExctnDt)
    ) A
    ORDER BY
        TrdExctnDtEOM
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-topBonds.csv', index=False)

### Non-Top Bonds

In [6]:
query = '''
    SELECT
        *
    FROM (
        SELECT
            EOMONTH(A.TrdExctnDt) AS TrdExctnDtEOM,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN EntrdVolQt ELSE 0 END) AS InstitutionalVolume,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN EntrdVolQt ELSE 0 END) AS RetailVolume,
            SUM(CASE WHEN EntrdVolQt >= 500000 THEN 1 ELSE 0 END) AS InstitutionalTrades,
            SUM(CASE WHEN EntrdVolQt < 250000 THEN 1 ELSE 0 END) AS RetailTrades,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt >= 500000 THEN A.CusipId END)) AS InstitutionalCusips,
            COUNT(DISTINCT (CASE WHEN EntrdVolQt < 250000 THEN A.CusipId END)) AS RetailCusips
        FROM
            Trace_filtered_withRatings A
        INNER JOIN (

            SELECT
                CusipId,
                TrdExctnDtEOM
            FROM (
                SELECT
                    *,
                    DENSE_RANK() OVER (PARTITION BY IssuerId, TrdExctnDtEOM ORDER BY Volume DESC) AS VolumeRanking
                FROM (
                    SELECT
                        IssuerId,
                        CusipId,
                        EOMONTH(TrdExctnDt) AS TrdExctnDtEOM,
                        SUM(EntrdVolQt) AS Volume
                    FROM
                        Trace_filtered_withRatings
                    WHERE
                        RatingNum <> 0
                        AND EntrdVolQt >= 500000 -- institunional
                        AND PrincipalAmt IS NOT NULL
                        AND TrdExctnDt <= EOMONTH(TrdExctnDt) AND TrdExctnDt > DATEADD(DAY, -5, EOMONTH(TrdExctnDt))
                    GROUP BY
                        IssuerId,
                        CusipId,
                        EOMONTH(TrdExctnDt)
                ) A
            ) B
            WHERE
                VolumeRanking > 3

            UNION

            SELECT
                CusipId,
                TrdExctnDtEOM
            FROM (
                SELECT
                    *,
                    NULL AS VolumeRanking
                FROM (
                    SELECT
                        IssuerId,
                        CusipId,
                        EOMONTH(TrdExctnDt) AS TrdExctnDtEOM,
                        SUM(EntrdVolQt) AS Volume
                    FROM
                        Trace_filtered_withRatings
                    WHERE
                        RatingNum <> 0
                        AND EntrdVolQt < 500000
                        AND PrincipalAmt IS NOT NULL
                        AND TrdExctnDt <= EOMONTH(TrdExctnDt) AND TrdExctnDt > DATEADD(DAY, -5, EOMONTH(TrdExctnDt))
                    GROUP BY
                        IssuerId,
                        CusipId,
                        EOMONTH(TrdExctnDt)
                ) A
            ) B

        ) B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        GROUP BY
            EOMONTH(A.TrdExctnDt)
    ) A
    ORDER BY
        TrdExctnDtEOM
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-nonTopBonds.csv', index=False)