In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Daily

In [3]:
query = '''
    SELECT DISTINCT
        TrdExctnDt,
        PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY Amihud) OVER (PARTITION BY TrdExctnDt) * 1000000 AS MedianAmihud
    FROM (
        SELECT
            CusipId,
            TrdExctnDt,
            ABS(RptdPr - LagRptdPr) / (RptdPr * Volume) AS Amihud
        FROM (    
            SELECT
                A.CusipId,
                A.TrdExctnDt,
                A.RptdPr,
                LAG(A.RptdPr) OVER (PARTITION BY A.CusipId ORDER BY A.TrdExctnDt) AS LagRptdPr,
                B.Volume
            FROM
                TraceFilteredWithRatings A
            INNER JOIN (    
                SELECT
                    CusipId,
                    TrdExctnDt,
                    MAX(TrdExctnTm) AS CloseTime,
                    SUM(EntrdVolQt) AS Volume
                FROM
                    TraceFilteredWithRatings
                GROUP BY
                    CusipId,
                    TrdExctnDt
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDt = B.TrdExctnDt AND A.TrdExctnTm = B.CloseTime
            WHERE
                B.Volume <> 0
        ) C
        WHERE
           RptdPr <> 0 
    ) D
    ORDER BY
        TrdExctnDt
'''

# read sql
df = pd.read_sql(query, connection)
df.to_csv('source/robustness-amihud-daily.csv', index=False)

### Weekly

In [4]:
query = '''
    SELECT DISTINCT
        TrdExctnDtSOW,
        PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY Amihud) OVER (PARTITION BY TrdExctnDtSOW) * 1000000 AS MedianAmihud
    FROM (
        SELECT
            CusipId,
            TrdExctnDtSOW,
            ABS(RptdPr - LagRptdPr) / Volume AS Amihud
        FROM (    
            SELECT
                A.CusipId,
                A.TrdExctnDtSOW,
                A.RptdPr,
                LAG(A.RptdPr) OVER (PARTITION BY A.CusipId ORDER BY A.TrdExctnDtSOW) AS LagRptdPr,
                B.Volume
            FROM (
                SELECT
                    *,
                    DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW
                FROM 
                    TraceFilteredWithRatings
            ) A
            INNER JOIN (    
                SELECT
                    CusipId,
                    DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW,
                    MAX(TrdExctnTm) AS CloseTime,
                    SUM(EntrdVolQt) AS Volume
                FROM
                    TraceFilteredWithRatings
                GROUP BY
                    CusipId,
                    DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt)
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDtSOW = B.TrdExctnDtSOW AND A.TrdExctnTm = B.CloseTime
            WHERE
                B.Volume <> 0
        ) C
    ) D
    ORDER BY
        TrdExctnDtSOW
'''

# read sql
df = pd.read_sql(query, connection)
df.to_csv('source/robustness-amihud-weekly.csv', index=False)

### Monthly

In [5]:
query = '''
    SELECT DISTINCT
        TrdExctnDtEOM,
        PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY Amihud) OVER (PARTITION BY TrdExctnDtEOM) * 1000000 AS MedianAmihud
    FROM (
        SELECT
            CusipId,
            TrdExctnDtEOM,
            ABS(RptdPr - LagRptdPr) / Volume AS Amihud
        FROM (    
            SELECT
                A.CusipId,
                A.TrdExctnDtEOM,
                A.RptdPr,
                LAG(A.RptdPr) OVER (PARTITION BY A.CusipId ORDER BY A.TrdExctnDtEOM) AS LagRptdPr,
                B.Volume
            FROM (
                SELECT
                    *,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM
                FROM 
                    TraceFilteredWithRatings
            ) A
            INNER JOIN (    
                SELECT
                    CusipId,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM,
                    MAX(TrdExctnTm) AS CloseTime,
                    SUM(EntrdVolQt) AS Volume
                FROM
                    TraceFilteredWithRatings
                GROUP BY
                    CusipId,
                    EOMONTH(TrdExctnDt)
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDtEOM = B.TrdExctnDtEOM AND A.TrdExctnTm = B.CloseTime
            WHERE
                B.Volume <> 0
        ) C
    ) D
    ORDER BY
        TrdExctnDtEOM
'''

# read sql
df = pd.read_sql(query, connection)
df.to_csv('source/robustness-amihud-monthly.csv', index=False)

### Yearly

In [6]:
query = '''
    SELECT DISTINCT
        TrdExctnDtYr,
        PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY Amihud) OVER (PARTITION BY TrdExctnDtYr) * 1000000 AS MedianAmihud
    FROM (
        SELECT
            CusipId,
            TrdExctnDtYr,
            ABS(RptdPr - LagRptdPr) / Volume AS Amihud
        FROM (    
            SELECT
                A.CusipId,
                A.TrdExctnDtYr,
                A.RptdPr,
                LAG(A.RptdPr) OVER (PARTITION BY A.CusipId ORDER BY A.TrdExctnDtYr) AS LagRptdPr,
                B.Volume
            FROM (
                SELECT
                    *,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr
                FROM 
                    TraceFilteredWithRatings
            ) A
            INNER JOIN (    
                SELECT
                    CusipId,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr,
                    MAX(TrdExctnTm) AS CloseTime,
                    SUM(EntrdVolQt) AS Volume
                FROM
                    TraceFilteredWithRatings
                GROUP BY
                    CusipId,
                    YEAR(TrdExctnDt)
            ) B ON A.CusipId = B.CusipId AND A.TrdExctnDtYr = B.TrdExctnDtYr AND A.TrdExctnTm = B.CloseTime
            WHERE
                B.Volume <> 0
        ) C
    ) D
    ORDER BY
        TrdExctnDtYr
'''

# read sql
df = pd.read_sql(query, connection)
df.to_csv('source/robustness-amihud-yearly.csv', index=False)