In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Herding Analysis

In [3]:
def herding_bondMarket(year_start, year_end, discard_nr, select_retail_institunional='None'):
    
    base_query = '''
        SELECT
            Date,
            Rm,
            ABS(Rm) AS AbsoluteRm,
            POWER(Rm, 2) AS SquaredRm,
            Sum / Count AS Csad,
            CASE 
                WHEN Rm <= [dbo].[leftTailBondReturns]() THEN 1
                ELSE 0
            END AS LeftTail,
            CASE 
                WHEN Rm >= [dbo].[rightTailBondReturns]() THEN 1
                ELSE 0
            END AS RightTail
        FROM (
            SELECT
                Date,
                Rm,
                ABS(SUM(RetEom) - Rm) AS Sum,
                COUNT(DISTINCT Cusip) AS Count
            FROM (
                SELECT
                    A.Date,
                    A.Cusip,
                    A.RetEom,
                    B.Rm
                FROM
                    BondReturns A
                INNER JOIN (
                    -- MONTHLY RM
                    SELECT
                        Date,
                        SUM(RetEom / 100 * TDvolume) / SUM(TDvolume) AS Rm
                    FROM (
                        SELECT
                            Date,
                            RetEom,
                            TDvolume,
                            RatingNum,
                            CASE
                                WHEN TDvolume < 100000 THEN 'R'
                                ELSE 'I'
                            END AS RetailThreshold
                        FROM
                            BondReturns
                        WHERE Cusip IN (SELECT DISTINCT CusipId FROM Trace_filtered_withRatings)
                    ) A
                    WHERE
                        Date >= '{}-01-01' AND Date < '{}-01-01'
    '''.format(
        year_start,
        year_end + 1
    )
    
    if discard_nr:
        base_query += 'AND RatingNum IS NOT NULL\n'
        
    if select_retail_institunional == 'ret':
        base_query += 'AND RetailThreshold = \'R\''
    elif select_retail_institunional == 'int':
        base_query += 'AND RetailThreshold = \'I\''
    
    base_query += '''
                    GROUP BY
                        Date
                ) B ON A.Date = B.Date
            ) A
            GROUP BY
                Date,
                Rm
        ) B
        ORDER BY
            Date
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [4]:
start, end = 2002, 2022

for x in ['int', 'ret', 'None']:
    df = herding_bondMarket(year_start=start, year_end=end, discard_nr=True, select_retail_institunional=x)
    df.to_csv('data/output/bonds/herding_bondMarket_{}_{}-{}.csv'.format(x, start, end), index=False)
    
df.head()

Unnamed: 0,Date,Rm,AbsoluteRm,SquaredRm,Csad,LeftTail,RightTail
0,2002-07-31,-0.002323,0.002323,5e-06,0.072513,0,0
1,2002-08-31,0.034931,0.034931,0.00122,1.890931,0,1
2,2002-09-30,0.002131,0.002131,5e-06,0.987137,0,0
3,2002-10-31,-0.006728,0.006728,4.5e-05,0.820465,0,0
4,2002-11-30,0.04555,0.04555,0.002075,3.016086,0,1


In [10]:
def herding_bondMarket_v2(year_start, year_end, discard_nr, select_retail_institunional='None'):
    
    base_query = '''
        SELECT
            Date,
            Rm,
            ABS(Rm) AS AbsoluteRm,
            POWER(Rm, 2) AS SquaredRm,
            Sum / Count AS Csad
        FROM (
            SELECT
                Date,
                Rm,
                ABS(SUM(R) - Rm) AS Sum,
                COUNT(DISTINCT Cusip) AS Count
            FROM (    
                SELECT
                    A.TrdExctnDt AS Date,
                    A.CusipId AS Cusip,
                    A.R,
                    B.Rm
                FROM
                    BondReturns_fromTrace A
                INNER JOIN (
                    SELECT
                        Date,
                        SUM(RetEod * EntrdVolQt) / SUM(EntrdVolQt) AS Rm
                    FROM (
                        SELECT
                            TrdExctnDt AS Date,
                            R AS RetEod,
                            EntrdVolQt
                        FROM
                            BondReturns_fromTrace
                        WHERE
                            Rated = 1
                            AND TrdExctnDt >= '{}-01-01' AND TrdExctnDt < '{}-01-01'
                    ) A
                    GROUP BY 
                        Date
                ) B ON A.TrdExctnDt = B.Date
            ) C
            GROUP BY
                Date,
                Rm
        ) D
        ORDER BY
            Date
    '''.format(
        year_start,
        year_end + 1
    )
    
    df = pd.read_sql(base_query, connection)
    
    return df

In [11]:
start, end = 2002, 2022

df = herding_bondMarket_v2(year_start=start, year_end=end, discard_nr=True)
df.to_csv('data/output/bonds/herding_bondMarket_{}-{}.csv'.format(start, end), index=False)
    
df.head()

Unnamed: 0,Date,Rm,AbsoluteRm,SquaredRm,Csad
0,2002-07-02,-0.0461,0.0461,0.002125,0.022339
1,2002-07-03,0.018747,0.018747,0.000351,0.11289
2,2002-07-08,-0.010717,0.010717,0.000115,0.159018
3,2002-07-09,0.019075,0.019075,0.000364,0.020062
4,2002-07-10,-0.103931,0.103931,0.010802,0.00889
