In [1]:
import os
import pandas as pd
import sqlalchemy as db
from dotenv import load_dotenv

In [2]:
load_dotenv()

# config and credentials
server = os.getenv('server')
database = os.getenv('database')
username = os.getenv('username')
password = os.getenv('password')

# connection
engine = db.create_engine(
    'mssql://{}:{}@{}/{}?driver=ODBC+Driver+18+for+SQL+Server'.format(
        username, password, server, database
    )
)

# establish connection
connection = engine.connect()

### CSAD Stocks

In [7]:
query = '''
    SELECT
        Datadate,
        MktRf, Smb, Hml, Rmw, Cma, Rf, Mom, Rm,
        ABS(Rm) AS AbsoluteRm,
        POWER(Rm, 2) AS SquaredRm, 
        Sum / Count AS Csad
    FROM (
        SELECT
            DataDate,
            MktRf, Smb, Hml, Rmw, Cma, Rf, Mom, Rm,
            ABS(SUM(MonthlyReturns) - Rm) AS Sum,
            COUNT(DISTINCT LPermNo) AS Count
        FROM (
            SELECT
                A.LPermNo,
                EOMONTH(A.DataDate) AS DataDate,
                (PrcCd / LAG(A.PrcCd)  OVER (PARTITION BY A.LPermNo ORDER BY EOMONTH(A.DataDate))) - 1 AS MonthlyReturns,
                C.*
            FROM
                CrspcSecuritiesDaily A
            INNER JOIN (
                SELECT
                    LPermNo,
                    MAX(DataDate) AS MaxDate
                FROM
                    CrspcSecuritiesDaily A
                GROUP BY
                    LPermNo,
                    EOMONTH(DataDate)
            ) B ON A.LPermNo = B.LPermNo AND A.DataDate = B.MaxDate
            INNER JOIN (
                SELECT
                    EOMONTH(Date) AS Date,
                    AVG(MktRf) AS MktRf,
                    AVG(Smb) AS Smb,
                    AVG(Hml) AS Hml,
                    AVG(Rmw) AS Rmw,
                    AVG(Cma) AS Cma,
                    AVG(Rf) AS Rf,
                    AVG(Mom) AS Mom,
                    AVG(Rm) AS Rm
                FROM
                    MarketFactors
                GROUP BY
                    EOMONTH(Date)
            ) C ON EOMONTH(A.DataDate) = C.Date
        ) A
        GROUP BY
            DataDate,
            MktRf, Smb, Hml, Rmw, Cma, Rf, Mom, Rm
    ) A
    ORDER BY
        DataDate
'''

# read sql
df = pd.read_sql(query, connection)
df.to_csv('source/csad-stocks.csv', index=False)

### Distinct Stocks

In [4]:
query = '''
    SELECT
        COUNT(DISTINCT LPermNo)
    FROM
        CrspcSecuritiesDaily A
'''

df = pd.read_sql(query, connection)
df.head()

Unnamed: 0,Unnamed: 1
0,5646


### Distinct Count per Exchange

In [5]:
query = '''
    SELECT
        Exchange,
        COUNT(DISTINCT LPermNo)
    FROM
        CrspcSecuritiesDaily A
    GROUP BY
        Exchange
'''

df = pd.read_sql(query, connection)
df.head()

Unnamed: 0,Exchange,Unnamed: 2
0,AMEX,236
1,NYSE,1987
2,NASDAQ,3448


### Distinct Count per Industry

In [6]:
query = '''
    SELECT
        Industry,
        COUNT(DISTINCT LPermNo)
    FROM
        CrspcSecuritiesDaily A
    GROUP BY
        Industry
'''

df = pd.read_sql(query, connection)
df.head(12)

Unnamed: 0,Industry,Unnamed: 2
0,Public,515
1,Wholesale,102
2,Transportation,232
3,Construction,48
4,Utilities,166
5,Mining,193
6,Services,1025
7,Retail,261
8,Missing,15
9,Manufacturing,2096
