In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Daily Distinct Cusips and Total Volume by Retail / Institunional grouping

In [21]:
def distinctCusips_totalVolume_by_retail_institutional(year_start, year_end, discard_nr):
    
    base_query = '''
        SELECT 
            Date,
            RetailThreshold,
            COUNT(DISTINCT Cusip) AS DistinctCusips,
            SUM(TDvolume) AS TotalVolume
        FROM (
            SELECT
                A.Date,
                Cusip,
                TDvolume,
                CASE WHEN TDvolume < 100000 THEN 'R' ELSE 'IN' END AS RetailThreshold
            FROM
                BondReturns A
            INNER JOIN
                BondIssues B ON A.Cusip = B.CompleteCusip
            INNER JOIN 
                BondIssuers C ON B.IssuerId = C.IssuerId
            WHERE
                C.IndustryGroup <> 4 -- governemt
                AND C.CountryDomicile = 'USA' 
                AND B.OfferingDate < B.Maturity
                AND Date >= '{}-01-1' AND Date < '{}-01-01'
                AND C.IndustryCode NOT IN (40, 41, 42, 43, 44, 45)
    '''.format(
        year_start,
        year_end + 1
    )
    
    if discard_nr:
        base_query += 'AND RatingNum <> 0'
        
    base_query += '''
         ) A
        GROUP BY
            Date,
            RetailThreshold
        ORDER BY
            Date, 
            RetailThreshold
    '''
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [22]:
start, end = 2002, 2022

df = distinctCusips_totalVolume_by_retail_institutional(year_start=start, year_end=end, discard_nr=True)
# df.to_csv('data/output/bonds/distinctCusips_totalVolume_by_retail_institutional_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,Date,RetailThreshold,DistinctCusips,TotalVolume
0,2002-07-31,IN,4256,385560261016
1,2002-07-31,R,658,24776768
2,2002-08-31,IN,4230,295426171487
3,2002-08-31,R,685,25476884
4,2002-09-30,IN,4231,277854057153


In [17]:
def industry_analysis_on_cusips_issuers(year_start, year_end, discard_nr):
    
    base_query = '''
        SELECT
            IndustryCode,
            DistinctCusips,
            DistinctIssuers
        FROM (
            SELECT 
                IndustryCode,
                COUNT(DISTINCT Cusip) AS DistinctCusips,
                COUNT(DISTINCT C.IssuerID) AS DistinctIssuers
            FROM
                BondReturns A
            INNER JOIN
                BondIssues B ON A.Cusip = B.CompleteCusip
            INNER JOIN 
                BondIssuers C ON B.IssuerId = C.IssuerId
            WHERE
                C.IndustryGroup <> 4 -- governemt
                AND C.CountryDomicile = 'USA' 
                AND B.OfferingDate < B.Maturity
                AND Date >= '{}-01-1' AND Date < '{}-01-01'
                AND C.IndustryCode NOT IN (40, 41, 42, 43, 44, 45)
    '''.format(
        year_start,
        year_end + 1
    )

    if discard_nr:
        base_query += 'AND RatingNum <> 0'

    base_query += '''
            GROUP BY
                IndustryCode 
        ) A
        ORDER BY
            IndustryCode
    '''
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [18]:
start, end = 2002, 2022

df = industry_analysis_on_cusips_issuers(year_start=start, year_end=end, discard_nr=True)
# df.to_csv('data/output/bonds/industry_analysis_on_cusips_issuers_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,IndustryCode,DistinctCusips,DistinctIssuers
0,10,5767,852
1,11,1409,233
2,12,1576,294
3,13,82,11
4,14,1089,147


In [20]:
df['DistinctCusips'].sum()

26575

In [25]:
def distinctCusips_totalVolume(year_start, year_end, discard_nr):
    
    base_query = '''
        SELECT 
            Date,
            COUNT(DISTINCT Cusip) AS DistinctCusips,
            SUM(TDvolume) AS TotalVolume
        FROM
            BondReturns A
        INNER JOIN
            BondIssues B ON A.Cusip = B.CompleteCusip
        INNER JOIN 
            BondIssuers C ON B.IssuerId = C.IssuerId
        WHERE
            C.IndustryGroup <> 4 -- governemt
            AND C.CountryDomicile = 'USA' 
            AND B.OfferingDate < B.Maturity
            AND Date >= '{}-01-1' AND Date < '{}-01-01'
            AND C.IndustryCode NOT IN (40, 41, 42, 43, 44, 45)
    '''.format(
        year_start,
        year_end + 1
    )

    if discard_nr:
        base_query += 'AND RatingNum <> 0'

    base_query += '''
        GROUP BY
            Date
        ORDER BY
            Date
    '''
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [26]:
start, end = 2002, 2022

df = distinctCusips_totalVolume(year_start=start, year_end=end, discard_nr=True)
# df.to_csv('data/output/bonds/distinctCusips_totalVolume_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,Date,DistinctCusips,TotalVolume
0,2002-07-31,4914,385585037784
1,2002-08-31,4915,295451648371
2,2002-09-30,4892,277880823060
3,2002-10-31,4928,341567507441
4,2002-11-30,4845,278020812470


In [32]:
def distinctCusips_by_maturity(year_start, year_end, discard_nr):
    
    base_query = '''
        SELECT
            MaturityBand,
            COUNT(DISTINCT Cusip) AS DistinctCusips
        FROM (
            SELECT
                Cusip,
                CASE 
                    WHEN ABS(DATEDIFF(DAY, A.Maturity, A.OfferingDate)) * 1.0 / 360 < 5 THEN 1
                    WHEN ABS(DATEDIFF(DAY, A.Maturity, A.OfferingDate)) * 1.0 / 360 < 15 THEN 2
                    ELSE 3
                END AS MaturityBand
            FROM
                BondReturns A
            INNER JOIN
                BondIssues B ON A.Cusip = B.CompleteCusip
            INNER JOIN 
                BondIssuers C ON B.IssuerId = C.IssuerId
            WHERE
                C.IndustryGroup <> 4 -- governemt
                AND C.CountryDomicile = 'USA' 
                AND B.OfferingDate < B.Maturity
                AND Date >= '{}-01-1' AND Date < '{}-01-01'
                AND C.IndustryCode NOT IN (40, 41, 42, 43, 44, 45)
    '''.format(
        year_start,
        year_end + 1
    )

    if discard_nr:
        base_query += 'AND RatingNum <> 0'

    base_query += '''
        ) A
        GROUP BY
            MaturityBand
        ORDER BY
            MaturityBand
    '''
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [33]:
start, end = 2002, 2022

df = distinctCusips_by_maturity(year_start=start, year_end=end, discard_nr=True)
# df.to_csv('data/output/bonds/distinctCusips_by_maturity_{}-{}.csv'.format(start, end), index=False)
df.head()

Unnamed: 0,MaturityBand,DistinctCusips
0,1,4649
1,2,17412
2,3,4514
