In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### Whole - Daily

In [3]:
query = '''
    SELECT
        A.TrdExctnDt,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDt,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDt,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDt, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDt, CusipId) AS CustomerSells
            FROM
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
        ) A
        GROUP BY
            TrdExctnDt,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDt,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDt,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
            GROUP BY
                TrdExctnDt,
                CusipId
        ) A
        GROUP BY
            TrdExctnDt
    ) B ON A.TrdExctnDt = B.TrdExctnDt
    ORDER BY
        A.TrdExctnDt,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-daily.csv', index=False)

### Whole - Weekly

In [4]:
query = '''
    SELECT
        A.TrdExctnDtSOW,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtSOW,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtSOW,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtSOW, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtSOW, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW
                FROM 
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
            ) A
        ) A
        GROUP BY
            TrdExctnDtSOW,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtSOW,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtSOW,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW
                FROM 
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
            ) A
            GROUP BY
                TrdExctnDtSOW,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtSOW
    ) B ON A.TrdExctnDtSOW = B.TrdExctnDtSOW
    ORDER BY
        A.TrdExctnDtSOW,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-weekly.csv', index=False)

### Whole - Monthly

In [3]:
query = '''
    SELECT
        A.TrdExctnDtEOM,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtEOM,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtEOM,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtEOM, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtEOM, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
            ) A
        ) A
        GROUP BY
            TrdExctnDtEOM,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtEOM,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtEOM,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
            ) A
            GROUP BY
                TrdExctnDtEOM,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtEOM
    ) B ON A.TrdExctnDtEOM = B.TrdExctnDtEOM
    ORDER BY
        A.TrdExctnDtEOM,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-monthly.csv', index=False)

### Whole - Yearly

In [5]:
query = '''
    SELECT
        A.TrdExctnDtYr,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtYr,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtYr,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtYr, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtYr, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
            ) A
        ) A
        GROUP BY
            TrdExctnDtYr,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtYr,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtYr,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
            ) A
            GROUP BY
                TrdExctnDtYr,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtYr
    ) B ON A.TrdExctnDtYr = B.TrdExctnDtYr
    ORDER BY
        A.TrdExctnDtYr,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-yearly.csv', index=False)

### Institutional - Daily

In [6]:
query = '''
    SELECT
        A.TrdExctnDt,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDt,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDt,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDt, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDt, CusipId) AS CustomerSells
            FROM
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
                AND EntrdVolQt >= 500000
        ) A
        GROUP BY
            TrdExctnDt,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDt,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDt,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
                AND EntrdVolQt >= 500000
            GROUP BY
                TrdExctnDt,
                CusipId
        ) A
        GROUP BY
            TrdExctnDt
    ) B ON A.TrdExctnDt = B.TrdExctnDt
    ORDER BY
        A.TrdExctnDt,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-daily-institutional.csv', index=False)

### Institutional - Weekly

In [7]:
query = '''
    SELECT
        A.TrdExctnDtSOW,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtSOW,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtSOW,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtSOW, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtSOW, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW
                FROM 
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt >= 500000
            ) A
        ) A
        GROUP BY
            TrdExctnDtSOW,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtSOW,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtSOW,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW
                FROM 
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt >= 500000
            ) A
            GROUP BY
                TrdExctnDtSOW,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtSOW
    ) B ON A.TrdExctnDtSOW = B.TrdExctnDtSOW
    ORDER BY
        A.TrdExctnDtSOW,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-weekly-institutional.csv', index=False)

### Institutional - Monthly

In [8]:
query = '''
    SELECT
        A.TrdExctnDtEOM,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtEOM,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtEOM,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtEOM, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtEOM, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt >= 500000
            ) A
        ) A
        GROUP BY
            TrdExctnDtEOM,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtEOM,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtEOM,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt >= 500000
            ) A
            GROUP BY
                TrdExctnDtEOM,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtEOM
    ) B ON A.TrdExctnDtEOM = B.TrdExctnDtEOM
    ORDER BY
        A.TrdExctnDtEOM,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-monthly-institutional.csv', index=False)

### Institutional - Yearly

In [9]:
query = '''
    SELECT
        A.TrdExctnDtYr,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtYr,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtYr,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtYr, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtYr, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt >= 500000
            ) A
        ) A
        GROUP BY
            TrdExctnDtYr,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtYr,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtYr,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt >= 500000
            ) A
            GROUP BY
                TrdExctnDtYr,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtYr
    ) B ON A.TrdExctnDtYr = B.TrdExctnDtYr
    ORDER BY
        A.TrdExctnDtYr,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-yearly-institutional.csv', index=False)

### Retail - Daily

In [10]:
query = '''
    SELECT
        A.TrdExctnDt,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDt,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDt,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDt, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDt, CusipId) AS CustomerSells
            FROM
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
                AND EntrdVolQt < 250000
        ) A
        GROUP BY
            TrdExctnDt,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDt,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDt,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
                AND EntrdVolQt < 250000
            GROUP BY
                TrdExctnDt,
                CusipId
        ) A
        GROUP BY
            TrdExctnDt
    ) B ON A.TrdExctnDt = B.TrdExctnDt
    ORDER BY
        A.TrdExctnDt,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-daily-retail.csv', index=False)

### Retail - Weekly

In [11]:
query = '''
    SELECT
        A.TrdExctnDtSOW,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtSOW,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY EOMONTH(TrdExctnDt), CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY EOMONTH(TrdExctnDt), CusipId) AS CustomerSells
            FROM 
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
                AND EntrdVolQt < 250000
        ) A
        GROUP BY
            TrdExctnDtSOW,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtSOW,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt) AS TrdExctnDtSOW,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM
                Trace_filteredWithRatings
            WHERE
                CntraMpId = 'C'
                AND EntrdVolQt < 250000
            GROUP BY
                DATEADD(DAY, 1 - DATEPART(WEEKDAY, TrdExctnDt) + 1, TrdExctnDt),
                CusipId
        ) A
        GROUP BY
            TrdExctnDtSOW
    ) B ON A.TrdExctnDtSOW = B.TrdExctnDtSOW
    ORDER BY
        A.TrdExctnDtSOW,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-weekly-retail.csv', index=False)

### Retail - Monthly

In [12]:
query = '''
    SELECT
        A.TrdExctnDtEOM,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtEOM,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtEOM,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtEOM, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtEOM, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt < 250000
            ) A
        ) A
        GROUP BY
            TrdExctnDtEOM,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtEOM,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtEOM,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    EOMONTH(TrdExctnDt) AS TrdExctnDtEOM
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt < 250000
            ) A
            GROUP BY
                TrdExctnDtEOM,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtEOM
    ) B ON A.TrdExctnDtEOM = B.TrdExctnDtEOM
    ORDER BY
        A.TrdExctnDtEOM,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-monthly-retail.csv', index=False)

### Retail - Yearly

In [14]:
query = '''
    SELECT
        A.TrdExctnDtYr,
        A.CusipId,
        ABS(Pt - P) AS FirstTerm,
        ABS(1.0 * P * BionomialDraw / N - P) AS SecondTerm,
        1.0 * ABS(Pt - P) - 1.0 * ABS(1.0 * P * BionomialDraw / N - P) AS Hm
    FROM (     
        SELECT
            TrdExctnDtYr,
            CusipId,
            MAX(CustomerBuys) + MAX(CustomerSells) AS N,
            1.0 * MAX(CustomerBuys) / (MAX(CustomerBuys) + MAX(CustomerSells)) AS Pt,
            SUM(CASE WHEN RandomDraw <= 1 - 1.0 * CustomerBuys / (CustomerBuys + CustomerSells) THEN 0 ELSE 1 END) AS BionomialDraw
        FROM (
            SELECT
                TrdExctnDtYr,
                CusipId,
                RAND() AS RandomDraw,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtYr, CusipId) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) OVER (PARTITION BY TrdExctnDtYr, CusipId) AS CustomerSells
            FROM (
                SELECT
                    *,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt < 250000
            ) A
        ) A
        GROUP BY
            TrdExctnDtYr,
            CusipId
    ) A
    INNER JOIN (
        SELECT
            TrdExctnDtYr,
            1.0 * SUM(CustomerBuys) / (SUM(CustomerBuys) + SUM(CustomersSells)) AS P
        FROM (
            SELECT
                TrdExctnDtYr,
                CusipId,
                SUM(CASE WHEN RptSideCd = 'S' THEN 1 ELSE 0 END) AS CustomerBuys,
                SUM(CASE WHEN RptSideCd = 'B' THEN 1 ELSE 0 END) AS CustomersSells
            FROM (
                SELECT
                    *,
                    YEAR(TrdExctnDt) AS TrdExctnDtYr
                FROM
                    Trace_filteredWithRatings
                WHERE
                    CntraMpId = 'C'
                    AND EntrdVolQt < 250000
            ) A
            GROUP BY
                TrdExctnDtYr,
                CusipId
        ) A
        GROUP BY
            TrdExctnDtYr
    ) B ON A.TrdExctnDtYr = B.TrdExctnDtYr
    ORDER BY
        A.TrdExctnDtYr,
        A.CusipId
'''

# read sql
df = pd.read_sql_query(query, connection)
df.to_csv('source/lsv-yearly-retail.csv', index=False)