In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Daily Distinct Cusips and Total Volume by Retail / Institunional grouping

In [3]:
def distinctCusips_totalVolume_by_retail_institutional(year_start, year_end):
    
    base_query = '''
        SELECT 
            TrdExctnDt,
            RetailThreshold,
            COUNT(DISTINCT CusipId) AS DistinctCusips,
            SUM(EntrdVolQt) AS TotalVolume
        FROM (
            SELECT
                TrdExctnDt,
                CASE WHEN EntrdVolQt < 100000 THEN 'R' ELSE 'IN' END AS RetailThreshold,
                CusipId,
                EntrdVolQt
            FROM
                TraceBond_filtered
            WHERE
                TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
        ) A
        GROUP BY
            TrdExctnDt,
            RetailThreshold
        ORDER BY
            TrdExctnDt, 
            RetailThreshold
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [4]:
start, end = 2002, 2022

df = distinctCusips_totalVolume_by_retail_institutional(year_start=start, year_end=end)
df.to_csv('data/output/bonds/distinctCusips_totalVolume_by_retail_institutional_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,TrdExctnDt,RetailThreshold,DistinctCusips,TotalVolume
0,2002-07-26,IN,985,6760576000.0
1,2002-07-26,R,1368,110150400.0
2,2002-07-29,IN,1088,11071520000.0
3,2002-07-29,R,1339,120445300.0
4,2002-07-30,IN,1292,12708750000.0


### Daily Distinct Cusips and Total Volume

In [5]:
def distinctCusips_totalVolume(year_start, year_end):
    
    base_query = '''
        SELECT 
            TrdExctnDt,
            COUNT(DISTINCT CusipId) AS DistinctCusips,
            SUM(EntrdVolQt) AS TotalVolume
        FROM
            TraceBond_filtered
        WHERE
            TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
        GROUP BY
            TrdExctnDt
        ORDER BY
            TrdExctnDt
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [6]:
start, end = 2002, 2022

df = distinctCusips_totalVolume(year_start=start, year_end=end)
df.to_csv('data/output/bonds/distinctCusips_totalVolume_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,TrdExctnDt,DistinctCusips,TotalVolume
0,2002-07-26,1847,6870726000.0
1,2002-07-29,1881,11191960000.0
2,2002-07-30,2076,12848100000.0
3,2002-07-31,2082,14801400000.0
4,2002-08-26,1841,3843934000.0


### Daily Total Volume by Side (Buy/Sell) 

In [7]:
def totalVolume_by_side(year_start, year_end):
    
    base_query = '''
        SELECT 
            TrdExctnDt,
            RptSideCd,
            SUM(EntrdVolQt) as TotalVolume
        FROM
            TraceBond_filtered
        WHERE
            TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
        GROUP BY
            TrdExctnDt, 
            RptSideCd
        ORDER BY
            TrdExctnDt, 
            RptSideCd
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [8]:
start, end = 2002, 2022

df = totalVolume_by_side(year_start=start, year_end=end)
df.to_csv('data/output/bonds/totalVolume_by_side_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,TrdExctnDt,RptSideCd,TotalVolume
0,2002-07-26,B,3316937000.0
1,2002-07-26,S,3553790000.0
2,2002-07-29,B,7469907000.0
3,2002-07-29,S,3722058000.0
4,2002-07-30,B,5393040000.0


### Industry analysis on Cusips and Issuers

In [9]:
def industry_analysis_on_cusips_issuers(year_start, year_end):
    
    base_query = '''
        SELECT
            IndustryCode,
            DistinctCusips,
            DistinctIssuers
        FROM (
            SELECT 
                IndustryCode,
                COUNT(DISTINCT CusipId) AS DistinctCusips,
                COUNT(DISTINCT IssuerID) AS DistinctIssuers
            FROM
                TraceBond_filtered
            WHERE
                TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
            GROUP BY
                IndustryCode 
        ) A
        ORDER BY
            IndustryCode
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [10]:
start, end = 2002, 2022

df = industry_analysis_on_cusips_issuers(year_start=start, year_end=end)
df.to_csv('data/output/bonds/industry_analysis_on_cusips_issuers_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,IndustryCode,DistinctCusips,DistinctIssuers
0,10,5694,869
1,11,1442,258
2,12,1560,290
3,13,78,11
4,14,1087,154


### Daily Distinct Cusips and Total Volume by Issuer

In [11]:
def distinctCusips_totalVolume_by_issuer(year_start, year_end):
    
    base_query = '''
        SELECT
            TrdExctnDt,
            IssuerId,
            COUNT(DISTINCT CusipId) AS DistinctCusips,
            SUM(EntrdVolQt) AS TotalVolume
        FROM 
            TraceBond_filtered
        WHERE
            TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
        GROUP BY
            TrdExctnDt, 
            IssuerId
        ORDER BY
            TrdExctnDt, 
            IssuerId
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [12]:
start, end = 2002, 2022

df = distinctCusips_totalVolume_by_issuer(year_start=start, year_end=end)
df.to_csv('data/output/bonds/distinctCusips_totalVolume_by_issuer_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,TrdExctnDt,IssuerId,DistinctCusips,TotalVolume
0,2002-07-26,11,6,45716000.0
1,2002-07-26,20,2,1704000.0
2,2002-07-26,26,13,23449000.0
3,2002-07-26,34,1,50000.0
4,2002-07-26,41,6,5132000.0


### Daily Distinct Cusips by Rating

In [13]:
def distinctCusips_by_rating(year_start, year_end):
    
    base_query = '''
        SELECT
            TrdExctnDt,
            MinimumRating,
            COUNT(DISTINCT CusipId) AS DistinctCusips
        FROM (
            SELECT
                A.CusipId,
                A.TrdExctnDt,
                MIN(B.RatingCategory) AS MinimumRating
            FROM (
                SELECT
                    A.CusipId, 
                    A.TrdExctnDt, 
                    MAX(B.RatingDate) AS MaxRatingDate
                FROM 
                    TraceBond_filtered A
                LEFT JOIN 
                    BondRatings B ON A.CusipId = B.CompleteCusip 
                    AND B.RatingDate <= A.TrdExctnDt 
                    AND B.RatingCategory IS NOT NULL
                WHERE
                    A.TrdExctnDt >= '{}-01-1' AND A.TrdExctnDt < '{}-01-01'
                GROUP BY
                    A.CusipId,
                    A.TrdExctnDt
            ) A
            INNER JOIN 
                BondRatings B ON B.CompleteCusip = A.CusipId AND RatingDate = MaxRatingDate
            GROUP BY
                A.CusipId,
                A.TrdExctnDt
        ) B
        GROUP BY
            TrdExctnDt,
            MinimumRating
        ORDER BY
            TrdExctnDt,
            MinimumRating
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [14]:
start, end = 2002, 2022

df = distinctCusips_by_rating(year_start=start, year_end=end)
df.to_csv('data/output/bonds/distinctCusips_by_rating_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,TrdExctnDt,MinimumRating,DistinctCusips
0,2002-07-26,1,61
1,2002-07-26,2,63
2,2002-07-26,3,111
3,2002-07-26,4,176
4,2002-07-26,5,220


### Distinct Cusips per Maturity Band

In [15]:
def distinctCusips_by_maturity(year_start, year_end):
    
    base_query = '''
        SELECT
            MaturityBand,
            COUNT(DISTINCT CusipId) AS DistinctCusips
        FROM (
            SELECT
                CASE 
                    WHEN ABS(DATEDIFF(DAY, Maturity, OfferingDate)) * 1.0 / 360 < 5 THEN 1
                    WHEN ABS(DATEDIFF(DAY, Maturity, OfferingDate)) * 1.0 / 360 < 15 THEN 2
                    ELSE 3
                END AS MaturityBand,
                CusipId
            FROM
                TraceBond_filtered
            WHERE
                TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
        ) A
        GROUP BY
            MaturityBand
        ORDER BY
            MaturityBand
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [16]:
start, end = 2002, 2022

df = distinctCusips_by_maturity(year_start=start, year_end=end)
df.to_csv('data/output/bonds/distinctCusips_by_maturity_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,MaturityBand,DistinctCusips
0,1,10506
1,2,19802
2,3,4359


### Distinct Cusips per Investment Grade

In [17]:
def distinctCusips_by_investmentGrade(year_start, year_end):
    
    base_query = '''
        SELECT 
            InvestmentGrade,
            COUNT(DISTINCT CusipId) AS DistinctCusips
        FROM (
            SELECT
                CusipId,
                CASE
                    WHEN RatingCategory < 11 THEN 'Y'
                    WHEN RatingCategory < 25 THEN 'N'
                    ELSE 'NR'
                END AS InvestmentGrade
            FROM (
                SELECT
                    A.CusipId,
                    MIN(B.RatingCategory) AS RatingCategory
                FROM (
                    SELECT
                        CusipId,
                        MIN(TrdExctnDt) AS FirstTradeExecutionDate
                    FROM 
                        TraceBond_filtered
                    WHERE
                        TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
                    GROUP BY
                        CusipId
                ) A
                LEFT JOIN 
                    BondRatings B ON A.CusipId = B.CompleteCusip 
                    AND B.RatingDate <= A.FirstTradeExecutionDate 
                    AND B.RatingCategory IS NOT NULL
                GROUP BY
                    A.CusipId
            ) B
        ) C
        GROUP BY
            InvestmentGrade
        ORDER BY
            InvestmentGrade
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [18]:
start, end = 2002, 2022

df = distinctCusips_by_investmentGrade(year_start=start, year_end=end)
df.to_csv('data/output/bonds/distinctCusips_by_investmentGrade_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,InvestmentGrade,DistinctCusips
0,N,3409
1,NR,11353
2,Y,19905


### Distinct Trading Days

In [20]:
def distinctTradingDays(year_start, year_end):
    
    base_query = '''
        SELECT
            COUNT(DISTINCT TrdExctnDt)
        FROM
            TraceBond_filtered
        WHERE
            TrdExctnDt >= '{}-01-1' AND TrdExctnDt < '{}-01-01'
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [21]:
start, end = 2002, 2022 # MIN = 2002-07-01 | MAX = 2022-09-30

df = distinctTradingDays(year_start=start, year_end=end)
df.to_csv('data/output/bonds/distinctTradingDays_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,Unnamed: 1
0,1007
