**Importing all the necessary libraries**

In [1]:
from sqlalchemy import create_engine, MetaData, update, select, Column, Integer, String, Table
from sqlalchemy.orm import Session
import pandas as pd
from sqlalchemy.ext.declarative import declarative_base

**Connecting notebook to the postgres database via postgres connection string**

In [2]:
database_connection_string = 'postgresql://postgres:postgres@ec2-18-141-177-116.ap-southeast-1.compute.amazonaws.com:5432/Project-Hamburg'
engine = create_engine(database_connection_string)
session = Session(engine)
conn = engine.connect()
base = declarative_base()

  base = declarative_base()


**List of Tables within Database.**

In [3]:
meta = MetaData(bind=engine)
MetaData.reflect(meta)
print("List of Tables in Postgres database.")
print(list(meta.tables.keys()))
ExchangeTable = meta.tables['Exchange']
TickerTable = meta.tables['Ticker']
TDAmeritradeDailyTable = meta.tables['TDAmeritradeDailyPrice']
TDAmeritradeMinuteTable = meta.tables['TDAmeritradeMinutePrice']
TickerFundamentalTable = meta.tables['TickerFundamental']
CNNFearAndGreed = meta.tables['CNNFearAndGreed']
CboeOptions = meta.tables['CboeOptions']
#ShortSqueeze = meta.tables['ShortSqueeze']
StocktwitsPost = meta.tables['StocktwitsPost']
StocktwitsSentiment = meta.tables['StocktwitsSentiment']
TradingEconomics = meta.tables['TradingEconomics']

List of Tables in Postgres database.
['StocktwitsSentiment', 'Exchange', 'Ticker', 'TDAmeritradeDailyPrice', 'TDAmeritradeMinutePrice', 'TickerFundamental', 'StocktwitsPost', 'CboeOptions', 'ShortSqueeze', 'TradingEconomics', 'CNNFearAndGreed']


**Querying database using Sqlalchemy ORM.**

In [None]:
statement = select(ExchangeTable)

for row in conn.execute(statement):
    print(row)

(1, 'S&P500', "Standard and Poor's 500", '', 'United States of America')


**Querying database using pandas.**

**1. Exchage Table**

In [None]:
statement = select(ExchangeTable)
df_exchange = pd.read_sql(statement, session.bind)
df_exchange.head()

Unnamed: 0,Id,Abbreviation,Name,City,Country
0,1,S&P500,Standard and Poor's 500,,United States of America


**2. Ticker Table**

In [None]:
statement = select(TickerTable)
df_ticker = pd.read_sql(statement, session.bind)
print("Total Tickers: ", len(df_ticker))
df_ticker.head()

Total Tickers:  791


Unnamed: 0,Id,ExchangeId,Symbol,Name
0,2,1,ABT,
1,3,1,ABBV,
2,4,1,ABMD,
3,5,1,ACN,
4,6,1,ATVI,


**3. Ticker Fundamental Table**

In [None]:
statement = select(TickerFundamentalTable)
df_ticker_fundamental = pd.read_sql(statement, session.bind)
df_ticker_fundamental.head()

Unnamed: 0,Id,TickerId,Date,MarketCapital,PERatio,EPS


**4. TDAmeritrade Daily Table**

In [None]:
#Parameters
ticker_name = "$SPX.X"
start_date = "2022-05-10"
end_date = "2022-07-10"
order_by_date_desc = False

# Query
statement = select(TickerTable.c.Id).where(TickerTable.c.Symbol == ticker_name)
for row in conn.execute(statement):
    ticker_id = row[0]
statement = select(TDAmeritradeDailyTable)
if(ticker_name != ""):
    statement = statement.where(TDAmeritradeDailyTable.c.TickerId == ticker_id)
if(start_date != ""):
    statement = statement.where(TDAmeritradeDailyTable.c.Date >= start_date)
if(end_date != ""):
    statement = statement.where(TDAmeritradeDailyTable.c.Date <= end_date)
if(order_by_date_desc):
  statement = statement.order_by(TDAmeritradeDailyTable.c.Date.desc())

df_daily = pd.read_sql(statement, session.bind)

#Result
df_daily.head(5)

Unnamed: 0,Id,TickerId,Date,Open,High,Low,Close,Volume
0,8803517,766,2022-05-10,4035.18,4068.82,3958.17,4001.05,0.0
1,8803518,766,2022-05-11,3990.08,4049.09,3928.82,3935.18,0.0
2,8803519,766,2022-05-12,3903.95,3964.8,3858.87,3930.08,0.0
3,8803520,766,2022-05-13,3963.9,4038.88,3963.9,4023.89,0.0
4,8803521,766,2022-05-16,4013.02,4046.46,3983.99,4008.01,0.0


**5. TDAmeritrade Minute Data**

In [None]:
#Parameters
ticker_name = "AAPL"
start_date = "2023-01-23"
end_date = "2023-01-23"
order_by_date_desc = False
include_extended_hours = False

# Query
statement = select(TickerTable.c.Id).where(TickerTable.c.Symbol == ticker_name)
for row in conn.execute(statement):
    ticker_id = row[0]
statement = select(TDAmeritradeMinuteTable)
if(ticker_name != ""):
    statement = statement.where(TDAmeritradeMinuteTable.c.TickerId == ticker_id)
if(start_date != ""):
    statement = statement.where(TDAmeritradeMinuteTable.c.Date >= start_date)
if(end_date != ""):
    statement = statement.where(TDAmeritradeMinuteTable.c.Date <= end_date)
if(not include_extended_hours):
    statement = statement.where(TDAmeritradeMinuteTable.c.Time >= '09:30:00').where(TDAmeritradeMinuteTable.c.Time < '16:00:00')
if(order_by_date_desc):
  statement = statement.order_by(TDAmeritradeMinuteTable.c.Date.desc())
df_minute = pd.read_sql(statement, session.bind)

#Result
df_minute.head()

Unnamed: 0,Id,TickerId,Date,Time,Open,High,Low,Close,Volume
0,68281619,45,2023-01-23,09:30:00,138.12,138.28,137.91,138.1299,842538
1,68281620,45,2023-01-23,09:31:00,138.12,138.27,137.9,138.19,329740
2,68281621,45,2023-01-23,09:32:00,138.195,138.96,138.18,138.892,523487
3,68281622,45,2023-01-23,09:33:00,138.875,139.22,138.78,139.2099,476255
4,68281623,45,2023-01-23,09:34:00,139.209,139.3,139.02,139.0413,334055


**Aggregated Data**

In [7]:
# Choose any time_frame value from the below. You can put any number in place of x
'''
x minutes - Minute 1, 5, 10, 15, 30, 60, 90, 120, 180, 240, 409,
x days - Daily 1 Day, 2 Day, 3 Day
microseconds
milliseconds
second
minute
hour
day
week
month
quarter
year
decade
century
millennium
'''

#Parameters
ticker_name = "AAPL"
time_frame = "5 days" 
table_name = "TDAmeritradeDailyPrice" # use "TDAmeritradeDailyPrice" for Daily Table or "TDAmeritradeMinutePrice" for Minute Table

In [10]:
if(len(time_frame.split()) > 1):
  if("days" in time_frame):
    #We can set this third parameter of Origin to start the data from the first entry of respective stock by supplying their datetime here in this origin.
    datetime_truncate = f'date_bin(\'{time_frame}\', "Date", TIMESTAMP \'1985-01-02\')'
  else:
    # For AAPL, we will get the date time of first entry of AAPL. If we want custom origin, we can write down here in origin field too. 
    datetime_truncate = f'date_bin(\'{time_frame}\', "Date" + "Time", TIMESTAMP \'2001-01-01\')'
else:
  datetime_truncate = f'date_trunc(\'{time_frame}\', "Date")'

# Query
statement = select(TickerTable.c.Id).where(TickerTable.c.Symbol == ticker_name)
for row in conn.execute(statement):
    ticker_id = row[0]

statement = f'''
SELECT DISTINCT week as "DateTime", 
first_value("Open") OVER w as "Open", 
max("High") OVER w as "High",
min("Low") OVER w as "Low",
last_value("Close") OVER w as "Close",
sum("Volume") Over w as "Volume"
FROM (SELECT *, {datetime_truncate} as week
from public."{table_name}" where "TickerId" = {ticker_id}) as foo
WINDOW w AS (PARTITION BY week ORDER BY week)
order by week
'''

df_aggregated = pd.read_sql(statement, session.bind)
df_aggregated.insert(loc = 0, column = "Date", value = pd.to_datetime(df_aggregated['DateTime']).dt.date)
df_aggregated.insert(loc = 1, column = "Time", value = pd.to_datetime(df_aggregated['DateTime']).dt.time)
df_aggregated = df_aggregated.drop(columns=['DateTime'], axis =1)

In [11]:
df_aggregated.head(25)

Unnamed: 0,Date,Time,Open,High,Low,Close,Volume
0,1985-01-02,00:00:00,0.130023,0.130023,0.124442,0.126674,479180800.0
1,1985-01-07,00:00:00,0.126674,0.135045,0.125,0.132812,960870400.0
2,1985-01-12,00:00:00,0.136719,0.138951,0.133929,0.135045,725289600.0
3,1985-01-17,00:00:00,0.135045,0.137277,0.125,0.13058,1225616000.0
4,1985-01-22,00:00:00,0.134487,0.135045,0.126674,0.132255,1570867000.0
5,1985-01-27,00:00:00,0.135045,0.136719,0.129464,0.129464,1404592000.0
6,1985-02-01,00:00:00,0.129464,0.133929,0.126674,0.131696,545798400.0
7,1985-02-06,00:00:00,0.133929,0.135603,0.131696,0.133371,571939200.0
8,1985-02-11,00:00:00,0.136161,0.137277,0.12221,0.125,1700944000.0
9,1985-02-16,00:00:00,0.124442,0.124442,0.117746,0.117746,369801600.0


CNN Fear And Greed Data

In [None]:
#Parameters
start_date = "2023-01-10"
end_date = "2023-02-10"
order_by_date_desc = False

# Query
statement = select(CNNFearAndGreed)
if(start_date != ""):
    statement = statement.where(CNNFearAndGreed.c.date >= start_date)
if(end_date != ""):
    statement = statement.where(CNNFearAndGreed.c.date <= end_date)
if(order_by_date_desc):
  statement = statement.order_by(CNNFearAndGreed.c.date.desc())

df_cnn = pd.read_sql(statement, session.bind)

#Result
df_cnn.head(5)

Unnamed: 0,Id,date,now,now_sentiment,previous_close,previous_close_sentiment,week_ago,week_ago_sentiment,month_ago,month_ago_sentiment,year_ago,year_ago_sentiment,timestamp
0,15457,2023-01-10,neutral,48,neutral,46,fear,38,neutral,53,neutral,49,00:04:36
1,15458,2023-01-10,neutral,48,neutral,47,fear,38,greed,59,neutral,51,00:09:36
2,15459,2023-01-10,neutral,48,neutral,47,fear,38,greed,59,neutral,51,00:14:36
3,15460,2023-01-10,neutral,48,neutral,47,fear,38,greed,59,neutral,51,00:19:36
4,15461,2023-01-10,neutral,48,neutral,47,fear,38,greed,59,neutral,51,00:24:36


Stocktwits Posts

In [None]:
#Parameters
ticker_name = "AAPL"
start_date = "2023-01-10"
end_date = "2023-02-10"
order_by_date_desc = False

# Query
statement = select(StocktwitsPost)
if(ticker_name != ""):
    statement = statement.where(StocktwitsPost.c.Ticker == ticker_name)
if(start_date != ""):
    statement = statement.where(StocktwitsPost.c.date >= start_date)
if(end_date != ""):
    statement = statement.where(StocktwitsPost.c.date <= end_date)
if(order_by_date_desc):
  statement = statement.order_by(StocktwitsPost.c.date.desc())

df_stocktwits_posts = pd.read_sql(statement, session.bind)

#Result
df_stocktwits_posts.head(5)

Unnamed: 0,Id,comments,date,Timestamp,Ticker,Watchlistcount,userfollowing,userfollowers,userid,userideas,userjoindate,userlikecount,userwatchliststockscount,linkssourcename,linkstitle,linksurl,linksvideourl,sentimentposition,likestotal,postid
0,90579,$AAPL \n\nOh well,2023-01-10,18:55:54,AAPL,869102,1,0,6693010,229,2022-06-13,8,12,,,,,,,505622525
1,78266,"$AAPL all bulls this morning, now it’s all bea...",2023-01-10,00:16:27,AAPL,869023,3,8,6300501,1713,2022-02-02,182,6,,,,,Bearish,,505469475
2,78267,$OSH $AAPL $GOOG $MSFT $AMZN Can we get this m...,2023-01-10,00:15:10,AAPL,869023,73,8,921827,103,2017-01-10,3087,140,,,,,Bullish,,505469310
3,78268,$JPM $AAPL $TSLA $SPY $BRK.B Every time someon...,2023-01-10,00:15:09,AAPL,869023,10,7808,6018168,30171,2021-10-24,1582,53,,,,,,,505469308
4,78269,$QCOM $110 tomorrow be careful longs\n\nApple ...,2023-01-10,00:13:36,AAPL,869023,11,149,6306426,4724,2022-02-03,1418,41,,,,,Bearish,1.0,505469106


Stocktwits Sentiments

In [None]:
#Parameters
ticker_name = "AAPL"
start_date = "2023-01-10"
end_date = "2023-02-10"
order_by_date_desc = False

# Query
statement = select(StocktwitsSentiment)
if(ticker_name != ""):
    statement = statement.where(StocktwitsSentiment.c.Ticker == ticker_name)
if(start_date != ""):
    statement = statement.where(StocktwitsSentiment.c.Date_scraped >= start_date)
if(end_date != ""):
    statement = statement.where(StocktwitsSentiment.c.Date_scraped <= end_date)
if(order_by_date_desc):
  statement = statement.order_by(StocktwitsSentiment.c.Date_scraped.desc())

df_stocktwits_sentiment = pd.read_sql(statement, session.bind)

#Result
df_stocktwits_sentiment.head(5)

Unnamed: 0,Id,Ticker,Date_updated,timestamp_updated,Date_scraped,timestamp_scraped,sentiment,price,message_volume
0,113400,AAPL,2023-01-09,14:59:56-05:00,2023-01-10,00:05:03,-0.07,0.409,8.75
1,114191,AAPL,2023-01-09,14:59:56-05:00,2023-01-10,00:35:03,-0.04,0.409,8.85
2,114982,AAPL,2023-01-09,14:59:56-05:00,2023-01-10,01:05:03,-0.04,0.409,8.85
3,115773,AAPL,2023-01-09,14:59:56-05:00,2023-01-10,01:35:03,-0.01,0.409,8.85
4,116564,AAPL,2023-01-09,14:59:56-05:00,2023-01-10,02:05:03,-0.01,0.409,8.85


Trading Economics


In [None]:
#Parameters
start_date = "2023"
end_date = "2023"
order_by_date_desc = False

# Query
statement = select(TradingEconomics)
if(start_date != ""):
    statement = statement.where(TradingEconomics.c.Year >= start_date)
if(end_date != ""):
    statement = statement.where(TradingEconomics.c.Year <= end_date)
if(order_by_date_desc):
  statement = statement.order_by(TradingEconomics.c.Year.desc())

df_trading_economics = pd.read_sql(statement, session.bind)

#Result
df_trading_economics.head(5)

Unnamed: 0,Id,Day,Month,Year,Frequency,Date,Time,Country,Report,Priority,Actual,Previous,Consensus,Forecast
0,13691,Monday,January,2023,,2,08:45 PM,CN,Caixin Manufacturing PMI DEC,3,49,49.4,48.8,48
1,13692,Tuesday,January,2023,,3,08:00 AM,DE,Inflation Rate YoY Prel DEC,3,8.6%,10%,9.1%,9.1%
2,13693,Wednesday,January,2023,,4,02:45 AM,FR,Inflation Rate YoY Prel DEC,3,5.9%,6.2%,6.4%,6.3%
3,13694,Wednesday,January,2023,,4,10:00 AM,US,ISM Manufacturing PMI DEC,3,48.4,49,48.5,49
4,13695,Wednesday,January,2023,,4,10:00 AM,US,JOLTs Job Openings NOV,3,10.458M,10.512M ®,10M,10.1M


CBOE Options

In [None]:
#Parameters
start_date = "2023-02-21"
end_date = "2023-02-23"
order_by_date_desc = True
root_name = "SPXW" #SPX, SPXW, VIX, VIXW

# Query
statement = select(CboeOptions)
if(root_name != ""):
    statement = statement.where(CboeOptions.c.root == root_name)
if(start_date != ""):
    statement = statement.where(CboeOptions.c.date >= start_date)
if(end_date != ""):
    statement = statement.where(CboeOptions.c.date <= end_date)
if(order_by_date_desc):
  statement = statement.order_by(CboeOptions.c.date.desc())

df_cboe_options = pd.read_sql(statement, session.bind)

#Result
df_cboe_options.head(5)

Unnamed: 0,Ticker,date,root,expiration,strike,option_type,OpenPrice,HighPrice,LowPrice,ClosePrice,...,bid_size_eod,bid_eod,ask_size_eod,ask_eod,underlying_bid_eod,underlying_ask_eod,vwap,open_interest,delivery_code,Id
0,.SPXW20231229C3950,2023-02-23,SPXW,2023-12-29,3950.0,C,0.0,0.0,0.0,0.0,...,3.0,392.5,3.0,397.9,0.0,0.0,0.0,93.0,,8882833
1,.SPXW20231229P3950,2023-02-23,SPXW,2023-12-29,3950.0,P,0.0,0.0,0.0,0.0,...,14.0,210.1,14.0,212.1,0.0,0.0,0.0,107.0,,8882834
2,.SPXW20231229C3975,2023-02-23,SPXW,2023-12-29,3975.0,C,0.0,0.0,0.0,0.0,...,3.0,376.3,3.0,381.6,0.0,0.0,0.0,647.0,,8882835
3,.SPXW20231229P3975,2023-02-23,SPXW,2023-12-29,3975.0,P,0.0,0.0,0.0,0.0,...,14.0,217.7,14.0,219.8,0.0,0.0,0.0,45.0,,8882836
4,.SPXW20231229C4000,2023-02-23,SPXW,2023-12-29,4000.0,C,0.0,0.0,0.0,0.0,...,3.0,360.3,3.0,365.5,0.0,0.0,0.0,501.0,,8882837


In [None]:
df_cboe_options.columns

Index(['Ticker', 'date', 'root', 'expiration', 'strike', 'option_type',
       'OpenPrice', 'HighPrice', 'LowPrice', 'ClosePrice', 'TotalVolume',
       'bid_size', 'bid', 'ask_size', 'ask', 'underlying_bid',
       'underlying_ask', 'implied_underlying_price', 'active_underlying_price',
       'implied_volatility', 'delta', 'gamma', 'theta', 'vega', 'rho',
       'bid_size_eod', 'bid_eod', 'ask_size_eod', 'ask_eod',
       'underlying_bid_eod', 'underlying_ask_eod', 'vwap', 'open_interest',
       'delivery_code', 'Id'],
      dtype='object')

In [None]:
df_cboe_options[['Ticker', 'date','delta', 'gamma', 'theta', 'vega', 'rho']]

Unnamed: 0,Ticker,date,delta,gamma,theta,vega,rho
0,.SPXW20231229C3950,2023-02-23,0.6406,0.0005,-0.4117,13.6920,18.2351
1,.SPXW20231229P3950,2023-02-23,-0.3614,0.0005,-0.4354,13.6917,-13.9342
2,.SPXW20231229C3975,2023-02-23,0.6288,0.0005,-0.4138,13.8346,17.9790
3,.SPXW20231229P3975,2023-02-23,-0.3732,0.0005,-0.4345,13.8343,-14.3940
4,.SPXW20231229C4000,2023-02-23,0.6166,0.0005,-0.4155,13.9688,17.7021
...,...,...,...,...,...,...,...
36923,.SPXW20230221P3350,2023-02-21,0.0000,0.0000,0.0000,0.0000,0.0000
36924,.SPXW20230221C3400,2023-02-21,0.0000,0.0000,0.0000,0.0000,0.0000
36925,.SPXW20230221P3400,2023-02-21,0.0000,0.0000,0.0000,0.0000,0.0000
36926,.SPXW20230221C3450,2023-02-21,0.0000,0.0000,0.0000,0.0000,0.0000


Short Squeeze

In [None]:
ShortSqueeze = Table(
    'ShortSqueeze',
    meta,
    Column('Record Date', String, key='RecordDate'),
    extend_existing=True
)

In [None]:
#Parameters
Year = "2021" # 2021, 2022, etc
Month = "Feb" # Jan, Feb, Mar, etc
Type = "A" # A or B

start_date = Year + "-" + Month + Type
order_by_date_desc = False

# Query
statement = select(ShortSqueeze)
if(start_date != ""):
    statement = statement.where(ShortSqueeze.c.RecordDate == start_date)
if(order_by_date_desc):
  statement = statement.order_by(ShortSqueeze.c.RecordDate.desc())

df_short_squeeze = pd.read_sql(statement, session.bind)

#Result
df_short_squeeze.head(5)

Unnamed: 0,Id,Short Interest Data,Symbol,Total Short Interest,Days to Cover,Short % of Float,Performance (52-wk),% Insider Ownership,% Institutional Ownership,% from 52-wk High,...,Shares: Float,Avg. Daily Vol.,Shares: Outstanding,Price,Market Cap,Exchange,Sector,Industry,Short Squeeze Ranking,Record Date
0,124747,Agilent Technologies Inc,A,2270000.0,1.3,0.75,55.3,0.32,90.07,-8.73,...,304090000.0,1800000.0,315990000.0,125.02,39505069800,NY,Healthcare,Diagnostics & Research,0.54,2021-FebA
1,124748,Alcoa Corporation,AA,11150000.0,1.9,6.05,77.08,0.55,76.92,-0.33,...,184400000.0,5930000.0,185550000.0,27.2,5046960000,NY,Basic Materials,Aluminum,8.86,2021-FebA
2,124749,Listed Funds Trust AAF First Priority CLO Bond...,AAA,5400.0,2.7,,0.2,,,-1.26,...,,2000.0,,25.07,$,NY,,,,2021-FebA
3,124750,AAREAL BANK AG AKT,AAALF,173700.0,,,-0.06,,,,...,,,,$,$,NBB,,,,2021-FebA
4,124751,Asia Broadband Inc Common Stoc,AABB,523300.0,,,,,,,...,,83419000.0,,$,$,NBB,Basic Materials,Industrial Metals & Minerals,,2021-FebA
