In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Daily

In [3]:
query = '''
    SELECT
        CusipId,
        TrdExctnDt,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(DAY, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.CusipId,
                A.TrdExctnDt,
                A.RptdPr,
                A.EntrdVolQt,
                A.RptSideCd,
                CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
            FROM
                Trace_filteredWithRatings A
            INNER JOIN (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    MIN(A.TrdExctnTm) AS MinTrdExctnTm
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        CusipId
                    FROM
                        Trace_filteredWithRatings
                    GROUP BY
                        CusipId
                    HAVING
                        COUNT(*) >= 50
                ) B ON A.CusipId = B.CusipId
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                GROUP BY
                    A.CusipId,
                    A.TrdExctnDt
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.TrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-daily.csv', index=False)

#### Top Bonds

In [4]:
query = '''
    SELECT
        CusipId,
        TrdExctnDt,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(DAY, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        A.CusipId,
                        A.TrdExctnDt,
                        MIN(A.TrdExctnTm) AS MinTrdExctnTm
                    FROM
                        Trace_filteredWithRatings A
                    INNER JOIN (
						SELECT
							CusipId
						FROM
							Trace_filteredWithRatings
						GROUP BY
							CusipId
						HAVING
							COUNT(*) >= 50
					) B ON A.CusipId = B.CusipId
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        A.CusipId,
                        A.TrdExctnDt
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.TrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A 
            INNER JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-topBonds-daily.csv', index=False)

#### Non Top Bonds

In [3]:
query = '''
    SELECT
        CusipId,
        TrdExctnDt,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(DAY, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        A.CusipId,
                        A.TrdExctnDt,
                        MIN(A.TrdExctnTm) AS MinTrdExctnTm
                    FROM
                        Trace_filteredWithRatings A
                    INNER JOIN (
						SELECT
							CusipId
						FROM
							Trace_filteredWithRatings
						GROUP BY
							CusipId
						HAVING
							COUNT(*) >= 50
					) B ON A.CusipId = B.CusipId
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        A.CusipId,
                        A.TrdExctnDt
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.TrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A 
            LEFT JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
            WHERE
                B.CusipId IS NULL
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-nonTopBonds-daily.csv', index=False)

### Weekly

In [4]:
query = '''
    SELECT
        CusipId,
        DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(WEEK, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.CusipId,
                A.TrdExctnDt,
                A.RptdPr,
                A.EntrdVolQt,
                A.RptSideCd,
                CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
            FROM
                Trace_filteredWithRatings A
            INNER JOIN (
                SELECT
                    CusipId,
                    TrdExctnDtSOW,
                    MIN(TrdExctnDt) AS MinTrdExctnDt,
                    MIN(TrdExctnTm) AS MinTrdExctnTm
                FROM (
                    SELECT
                        A.*,
                        DATEADD(DAY, 1 - DATEPART(WEEKDAY, A.TrdExctnDt) + 1, A.TrdExctnDt) AS TrdExctnDtSOW
                    FROM
                        Trace_filteredWithRatings A
                    INNER JOIN (
						SELECT
							CusipId
						FROM
							Trace_filteredWithRatings
						GROUP BY
							CusipId
						HAVING
							COUNT(*) >= 50
					) B ON A.CusipId = B.CusipId
                ) A
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                GROUP BY
                    CusipId,
                    TrdExctnDtSOW
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-weekly.csv', index=False)

#### Top Bonds

In [5]:
query = '''
    SELECT
        CusipId,
        DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(WEEK, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        CusipId,
                        TrdExctnDtSOW,
                        MIN(TrdExctnDt) AS MinTrdExctnDt,
                        MIN(TrdExctnTm) AS MinTrdExctnTm
                    FROM (
                        SELECT
                            A.*,
                            DATEADD(DAY, 1 - DATEPART(WEEKDAY, A.TrdExctnDt) + 1, A.TrdExctnDt) AS TrdExctnDtSOW
                        FROM
                            Trace_filteredWithRatings A
                        INNER JOIN (
                            SELECT
                                CusipId
                            FROM
                                Trace_filteredWithRatings
                            GROUP BY
                                CusipId
                            HAVING
                                COUNT(*) >= 50
                        ) B ON A.CusipId = B.CusipId
                    ) A
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        CusipId,
                        TrdExctnDtSOW
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A
            INNER JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-topBonds-weekly.csv', index=False)

#### Non Top Bonds

In [6]:
query = '''
    SELECT
        CusipId,
        DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(WEEK, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        CusipId,
                        TrdExctnDtSOW,
                        MIN(TrdExctnDt) AS MinTrdExctnDt,
                        MIN(TrdExctnTm) AS MinTrdExctnTm
                    FROM (
                        SELECT
                            A.*,
                            DATEADD(DAY, 1 - DATEPART(WEEKDAY, A.TrdExctnDt) + 1, A.TrdExctnDt) AS TrdExctnDtSOW
                        FROM
                            Trace_filteredWithRatings A
                        INNER JOIN (
                            SELECT
                                CusipId
                            FROM
                                Trace_filteredWithRatings
                            GROUP BY
                                CusipId
                            HAVING
                                COUNT(*) >= 50
                        ) B ON A.CusipId = B.CusipId
                    ) A
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        CusipId,
                        TrdExctnDtSOW
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A
            LEFT JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
            WHERE
                B.CusipId IS NULL
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-nonTopBonds-weekly.csv', index=False)

### Monthly

In [7]:
query = '''
    SELECT
        CusipId,
        EOMONTH(TrdExctnDt) AS TrdExctnDtEOM,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(MONTH, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.CusipId,
                A.TrdExctnDt,
                A.RptdPr,
                A.EntrdVolQt,
                A.RptSideCd,
                CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
            FROM
                Trace_filteredWithRatings A
            INNER JOIN (
                SELECT
                    CusipId,
                    TrdExctnDtEOM,
                    MIN(TrdExctnDt) AS MinTrdExctnDt,
                    MIN(TrdExctnTm) AS MinTrdExctnTm
                FROM (
                    SELECT
                        A.*,
                        EOMONTH(A.TrdExctnDt) AS TrdExctnDtEOM
                    FROM
                        Trace_filteredWithRatings A
                    INNER JOIN (
						SELECT
							CusipId
						FROM
							Trace_filteredWithRatings
						GROUP BY
							CusipId
						HAVING
							COUNT(*) >= 50
					) B ON A.CusipId = B.CusipId
                ) A
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                GROUP BY
                    CusipId,
                    TrdExctnDtEOM
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-monthly.csv', index=False)

#### Top Bonds

In [8]:
query = '''
    SELECT
        CusipId,
        EOMONTH(TrdExctnDt) AS TrdExctnDtEOM,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(MONTH, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        CusipId,
                        TrdExctnDtEOM,
                        MIN(TrdExctnDt) AS MinTrdExctnDt,
                        MIN(TrdExctnTm) AS MinTrdExctnTm
                    FROM (
                        SELECT
                            A.*,
                            EOMONTH(A.TrdExctnDt) AS TrdExctnDtEOM
                        FROM
                            Trace_filteredWithRatings A
                        INNER JOIN (
                            SELECT
                                CusipId
                            FROM
                                Trace_filteredWithRatings
                            GROUP BY
                                CusipId
                            HAVING
                                COUNT(*) >= 50
                        ) B ON A.CusipId = B.CusipId
                    ) A
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        CusipId,
                        TrdExctnDtEOM
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A
            INNER JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-topBonds-monthly.csv', index=False)

#### Non Top Bonds

In [9]:
query = '''
    SELECT
        CusipId,
        EOMONTH(TrdExctnDt) AS TrdExctnDtEOM,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(MONTH, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        CusipId,
                        TrdExctnDtEOM,
                        MIN(TrdExctnDt) AS MinTrdExctnDt,
                        MIN(TrdExctnTm) AS MinTrdExctnTm
                    FROM (
                        SELECT
                            A.*,
                            EOMONTH(A.TrdExctnDt) AS TrdExctnDtEOM
                        FROM
                            Trace_filteredWithRatings A
                        INNER JOIN (
                            SELECT
                                CusipId
                            FROM
                                Trace_filteredWithRatings
                            GROUP BY
                                CusipId
                            HAVING
                                COUNT(*) >= 50
                        ) B ON A.CusipId = B.CusipId
                    ) A
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        CusipId,
                        TrdExctnDtEOM
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A
            LEFT JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
            WHERE
                B.CusipId IS NULL
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-nonTopBonds-monthly.csv', index=False)

### Yearly

In [10]:
query = '''
    SELECT
        CusipId,
        YEAR(TrdExctnDt) AS TrdExctnDtYr,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(YEAR, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.CusipId,
                A.TrdExctnDt,
                A.RptdPr,
                A.EntrdVolQt,
                A.RptSideCd,
                CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
            FROM
                Trace_filteredWithRatings A
            INNER JOIN (
                SELECT
                    A.CusipId,
                    MIN(A.TrdExctnDt) AS MinTrdExctnDt,
                    MIN(A.TrdExctnTm) AS MinTrdExctnTm
                FROM 
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        CusipId
                    FROM
                        Trace_filteredWithRatings
                    GROUP BY
                        CusipId
                    HAVING
                        COUNT(*) >= 50
                ) B ON A.CusipId = B.CusipId
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                GROUP BY
                    A.CusipId,
                    YEAR(A.TrdExctnDt)
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-yearly.csv', index=False)

#### Top Bonds

In [11]:
query = '''
    SELECT
        CusipId,
        YEAR(TrdExctnDt) AS TrdExctnDtYr,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(YEAR, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        A.CusipId,
                        MIN(A.TrdExctnDt) AS MinTrdExctnDt,
                        MIN(A.TrdExctnTm) AS MinTrdExctnTm
                    FROM 
                        Trace_filteredWithRatings A
                    INNER JOIN (
						SELECT
							CusipId
						FROM
							Trace_filteredWithRatings
						GROUP BY
							CusipId
						HAVING
							COUNT(*) >= 50
					) B ON A.CusipId = B.CusipId
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        A.CusipId,
                        YEAR(A.TrdExctnDt)
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A
            INNER JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-topBonds-yearly.csv', index=False)

#### Non Top Bonds

In [12]:
query = '''
    SELECT
        CusipId,
        YEAR(TrdExctnDt) AS TrdExctnDtYr,
        RptdPr,
        EntrdVolQt,
        LagRptdPr,
        RptdPr - LagRptdPr AS DPt,
        Qt,
        LagQt,
        Qt - LagQt AS DQt,
        QtVt,
        LagQtVt,
        QtVt - LagQtVt AS DQtVt
    FROM (
        SELECT
            *,
            LAG(RptdPr) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagRptdPr,
            LAG(Qt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQt,
            LAG(QtVt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt) AS LagQtVt,
            DATEDIFF(YEAR, LAG(TrdExctnDt) OVER (PARTITION BY CusipId ORDER BY TrdExctnDt), TrdExctnDt) AS TimeId
        FROM (
            SELECT
                A.*
            FROM (
                SELECT
                    A.CusipId,
                    A.TrdExctnDt,
                    A.RptdPr,
                    A.EntrdVolQt,
                    A.RptSideCd,
                    CASE WHEN RptSideCd = 'S' THEN 1 WHEN RptSideCd = 'B' THEN -1 END AS Qt,
                    CASE WHEN RptSideCd = 'S' THEN 1 * EntrdVolQt WHEN RptSideCd = 'B' THEN -1 * EntrdVolQt END AS QtVt
                FROM
                    Trace_filteredWithRatings A
                INNER JOIN (
                    SELECT
                        A.CusipId,
                        MIN(A.TrdExctnDt) AS MinTrdExctnDt,
                        MIN(A.TrdExctnTm) AS MinTrdExctnTm
                    FROM 
                        Trace_filteredWithRatings A
                    INNER JOIN (
						SELECT
							CusipId
						FROM
							Trace_filteredWithRatings
						GROUP BY
							CusipId
						HAVING
							COUNT(*) >= 50
					) B ON A.CusipId = B.CusipId
                    WHERE
                        CntraMpId = 'C'
                        AND EntrdVolQt <> CASE WHEN RatingNum <= 10 THEN 5000000 WHEN RatingNum >= 11 THEN 1000000 END
                    GROUP BY
                        A.CusipId,
                        YEAR(A.TrdExctnDt)
                ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.MinTrdExctnDt AND A.TrdExctnTm = B.MinTrdExctnTm
            ) A
            LEFT JOIN
                BondReturns_topBonds B ON A.CusipId = B.CusipId AND EOMONTH(A.TrdExctnDt) = B.TrdExctnDtEOM
            WHERE
                B.CusipId IS NULL
        ) A
    ) B
    WHERE
        TimeId = 1
        AND	Qt - LagQt IS NOT NULL
        AND QtVt - LagQtVt IS NOT NULL
        AND RptdPr - LagRptdPr IS NOT NULL
    ORDER BY
        TrdExctnDt,
        CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/informationAsymmetry-nonTopBonds-yearly.csv', index=False)