In [1]:
import nasdaqdatalink
import os
import json
import quandl
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas_market_calendars as mcal
from datetime import datetime
import requests
from refresh_functions import *
#pd.options.display.float_format = '{:.3f}'.format

#store my API key
with open('C:/Users/meich/.nasdaq/data_link_apikey.json') as f:
    data=json.load(f)
    key=data['api_key']
quandl.ApiConfig.api_key = key

# Data Processing
-----

### Sharadar (SEP ~ Equity Prices)

for now, this cell should be run daily (except post-holiday, sundays, mondays)

must update code to pass list of dates between max(csv date) - daily date, in case miss a run

In [4]:
sharadarSEP()
sep = pd.read_csv('C:/Users/meich/CareerDocs/projects/stock_prediction/Data/SHARADAR_SEP.csv')

New Data---
2022-12-28 00:00:00


### Tickers (filters)

In [7]:
tickers = sharadarTICKERS()

### Daily Metrics (filters)

In [8]:
sharadarDAILY()
daily = pd.read_csv('C:/Users/meich/CareerDocs/projects/stock_prediction/Data/SHARADAR_DAILY.csv')

# FILTER OUT STOCKS THAT WERE NEVER 500M MARKETCAP MINIMUM
daily.set_index('ticker',inplace=True)
daily['marketcap_max'] = daily.groupby('ticker').max()['marketcap']
daily = daily[daily['marketcap_max']>=500]
daily = daily.reset_index()

New Data---
2022-12-28 00:00:00


### Short Interest Activity (Finra)

In [9]:
finraSHORTS()
si = pd.read_csv('C:/Users/meich/CareerDocs/projects/stock_prediction/Data/FINRA_SI.csv')

Data up to date:
2022-12-28


### Retail Trader Activity

In [10]:
# get historic values, process data
nasdaqRTAT()
rtat = pd.read_csv('C:/Users/meich/CareerDocs/projects/stock_prediction/Data/NDAQ_RTAT.csv')

New Data---
2022-12-28 00:00:00


### Create foundational dataset (shar)

In [11]:
combined = daily.merge(tickers,left_on='ticker',right_on='ticker',how='left')
combined = combined.merge(sep,left_on=['date','ticker'],right_on=['date','ticker'],how='left')
combined = combined.merge(rtat,left_on=['date','ticker'],right_on=['date','ticker'],how='left')
combined = combined.merge(si,left_on=['date','ticker'],right_on=['date','ticker'],how='left')

# Exploration/Analysis
-----

In [12]:
#SI METRICS
combined['ShortRatio'] = combined['ShortVolume']/combined['TotalVolume']

#ROLLING, CHANGE, etc

#can make some of these metric creation functions iterable over all fields
# def simple_features()
# def si_features()
# def rtat_features() 
# ...etc

In [13]:
#  ---- RTAT NOTES-------#
# CHECK FOR AUTO CORRELATION IN BOTH METRICS
# TEST FOR RMSE STRATEGY OF SIMILARITY INDEX USING ACT/SENT TO PRODUCE A SINGLE SCORE FOR AGG
# SET UP ACTIVITY TRIGGER TO CATCH DAILY VALUES AND SEND NOTIFICATION + VISUAL

combined = fts_rtat(combined)

In [19]:
stock = combined[combined['ticker'] == 'MSFT'].copy()
stock = stock.sort_values(by='date')

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=stock['date'], y=stock['closeadj'], name="price"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=stock['date'], y=stock['activity']*100, name="activity"),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x=stock['date'], y=stock['sentiment'], name="sentiment"),
    secondary_y=True,
)
fig.show()

### Start with a simple model -- Retail Activity Predictors & FWD Select Into

In [None]:
# BUILD SIMPLE BASELINE MODEL TO PREDICT PRICE AT DIFFERENT TIME POINTS (EX: 5 DAY, 10 DAY, 20 DAY, 1 MONTH, 3 MONTH, 6 MONTH)
#       OR COULD CONSTRUCT AS A CLASSIFICATION MODEL TOO - PREDICT PROBABILITY THAT Y WILL FALL Y%

### Notification System

In [21]:
# NOTIFY (BASED ON MODELLING RESULTS ABOVE) + APPEND TO CSV
combined[combined['activity']>0].sort_values('activity',ascending=False).head(20)

Unnamed: 0,ticker,date,lastupdated,ev,evebit,evebitda,marketcap,pb,pe,ps,...,activity_15,sentiment_15,activity_30,sentiment_30,activity_recent_ratio,sentiment_recent_ratio,prod_sent_act,prod_sent_act_5,prod_sent_act_15,prod_sent_act_30
264746,AMC,2021-06-02,2021-06-02,38372.6,-14.8,-18.2,28165.0,-12.2,-9.5,62.7,...,,,,,,,78.453,,,
260877,AMC,2021-06-03,2021-06-03,33325.0,-12.8,-15.8,23117.4,-10.0,-7.8,51.5,...,,,,,,,44.042,,,
1549011,TSLA,2020-02-04,2022-09-06,167659.1,-3446.3,82.8,159887.1,26.5,-193.1,6.5,...,,,,,,,0.0,,,
5824333,TSLA,2022-12-28,2022-12-28 00:00:00,339931.3,27.0,21.0,355910.3,8.9,31.8,4.8,...,0.0121,-5.866667,,,,,-71.564,-16.6382,-7.104533,
272485,AMC,2021-05-28,2021-05-28,21968.9,-8.5,-10.4,11761.3,-5.1,-3.9,26.2,...,,,,,,,71.324,,,
891136,AAPL,2020-10-13,2021-12-19,2150457.1,30.1,25.9,2071117.1,28.7,35.4,7.6,...,0.0115,0.066667,0.005797,0.3,5.948246,-4.0,68.804,-4.1388,0.076733,0.1742
1545254,TSLA,2020-02-05,2022-09-06,140197.9,-2881.8,69.2,132425.9,21.9,-159.9,5.4,...,,,,,,,-16.601,,,
1008921,TSLA,2020-08-31,2022-09-06,469863.8,407.5,139.0,464339.8,47.1,1261.8,18.1,...,,,,,,,33.142,,,
4399816,TSLA,2022-12-23,2022-12-23,372898.2,29.7,23.1,388877.2,9.8,34.8,5.2,...,0.010473,-7.4,0.00525,-4.733333,5.969524,0.887324,-46.953,-13.167,-7.757667,-2.489733
806667,NIO,2020-11-13,2020-11-13,49900.9,-44.2,-37.7,49571.1,-59.3,-41.0,35.1,...,,,,,,,30.422,24.969,,
