# A view of trends and correlations in movement for stocks and cryptocurrencies

## GOALS
#### =====================================================
#### =====================================================
### 1) Correlations of movements across stocks and cryptos
#### A) 1-wk
#### B) 1-mo
#### C) 1-yr
#### =====================================================
### 2) Clustering of stocks & trends of clusters
#### A) Sector
#### B) Subsectors
#### =====================================================
### 3) Leverage some tools from microprediction
#### https://www.microprediction.com/make-predictions
#### =====================================================
### 4) Web scraping for sentiment and articles
#### =====================================================
### 5) Correlations across exchanges, foreign and domestic
#### =====================================================
###
### ====================================================================================================================================================================================================================================================

### Installation of libraries (consider conda install for working env first)


In [1]:
# pip install altair
# pip install pandas
# pip install requests_html
# pip install scikit-plot
# pip install tensorflow
# pip install --upgrade pip
# pip install yahoo_fin
# pip install yfinance --upgrade --user --no-cache-dir
# pip install --user pycaret
# pip install scikit-learn==0.23.2

### Importing relevant libraries

In [3]:
import ftplib
import io
import pandas as pd
import requests
import requests_html
import numpy as np

from pycaret.classification import *
from pycaret.regression import *

import yfinance as yf
from yahoo_fin.stock_info import get_data, get_top_crypto, get_analysts_info
import yahoo_fin.stock_info as si
import yahoo_fin.options as ops
from yahoo_fin.stock_info import *

# Dow: tickers_dow()
# Nasdaq: tickers_nasdaq()
# S&P500: tickers_sp500()
# Others: tickers_other()

import tensorflow as tf
import altair as alt



### Yahoo_fin has two modules - stock_info and options.
---------
#### Stock_info has the following methods:

#### get_analysts_info(), get_balance_sheet(), get_cash_flow(), get_data(), get_day_gainers(), get_day_losers(), get_day_most_active(), get_holders(), get_income_statement(), get_live_price(), get_quote_table(), get_top_crypto(), get_stats(), get_stats_valuation(), tickers_dow(), tickers_nasdaq(), tickers_other(), tickers_sp500()
---------
#### And options has:

#### get_calls(), get_expiration_dates(), get_options_chain(), get_puts()
---------

#### And there are the methods you can’t use without requests_html are:

#### stock_info module, get_day_gainers(), get_day_most_active(), get_day_losers(), get_top_crypto(), get_expiration_dates()
---------

### Pulling data

In [4]:
# ticker: case insensitive ticker of the desired stock/bond
# start_date: date you want the data to start from (mm/dd/yyyy)
# end_date: date you want the data to end (mm/dd/yyyy)
# index_as_date: {True, False}. Default is true. If true then the dates of the records are set as the index, else they are returned as a separate column.
# interval: {“1d”, “1wk”, “1mo”}. Refers to the interval to sample the data: “1d”= daily, “1wk”= weekly, “1mo”=monthly.
# get_data(ticker, start_date = None, end_date = None, index_as_date = True, interval = “1d”)

#Amazon only case
# amazon_weekly= get_data("amzn", start_date="12/04/2009", end_date="12/04/2021", index_as_date = True, interval="1mo")
# amazon_weekly = amazon_weekly.reset_index()
# amazon_weekly = amazon_weekly.rename(columns={'index':'date'})
# amazon_weekly.head()

####################################################################################################################################################################

metaNasdaqDf = tickers_nasdaq(include_company_data = True)
timeseriesNasdaqDf = pd.DataFrame()
for ticker in tickers_nasdaq():
    try:
        # display(get_data(ticker,start_date="01/16/2016" , end_date="01/15/2021", index_as_date = True, interval="1mo").head())
        timeseriesNasdaqDf = pd.concat([timeseriesNasdaqDf,get_data(ticker,start_date="01/16/2016" ,  end_date="01/15/2021", index_as_date = True, interval="1mo")])
    except:
        pass


timeseriesNasdaqDf = timeseriesNasdaqDf.reset_index()
timeseriesNasdaqDf = timeseriesNasdaqDf.rename(columns={'index':'date'})
timeseriesNasdaqDf.head()

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2016-02-01,5.23,5.6,4.21,5.06,0.485294,198600.0,AACG
2016-03-01,5.06,5.5,4.52,5.19,0.497762,1375100.0,AACG
2016-04-01,5.31,6.91,5.15,5.69,0.545716,340200.0,AACG
2016-05-01,5.58,5.73,4.41,4.95,0.474744,188700.0,AACG
2016-06-01,4.93,5.7,4.57,4.63,0.444054,177800.0,AACG


In [41]:
metaNasdaqDf.head()
# metaNasdaqDf[metaNasdaqDf['Symbol']=='AAPL']


Unnamed: 0,Symbol,Security Name,Market Category,Test Issue,Financial Status,Round Lot Size,ETF,NextShares
0,AACG,ATA Creativity Global - American Depositary Sh...,G,N,N,100.0,N,N
1,AACI,Armada Acquisition Corp. I - Common Stock,G,N,N,100.0,N,N
2,AACIU,Armada Acquisition Corp. I - Unit,G,N,N,100.0,N,N
3,AACIW,Armada Acquisition Corp. I - Warrant,G,N,N,100.0,N,N
4,AADI,"Aadi Bioscience, Inc. - Common Stock",S,N,N,100.0,N,N


In [12]:
fullNasdaqDf = timeseriesNasdaqDf.merge(metaNasdaqDf, left_on='ticker', right_on='Symbol', how='left')
fullNasdaqDf.head()

Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker,Symbol,Security Name,Market Category,Test Issue,Financial Status,Round Lot Size,ETF,NextShares
0,2016-02-01,5.23,5.6,4.21,5.06,0.485294,198600.0,AACG,AACG,ATA Creativity Global - American Depositary Sh...,G,N,N,100.0,N,N
1,2016-03-01,5.06,5.5,4.52,5.19,0.497762,1375100.0,AACG,AACG,ATA Creativity Global - American Depositary Sh...,G,N,N,100.0,N,N
2,2016-04-01,5.31,6.91,5.15,5.69,0.545716,340200.0,AACG,AACG,ATA Creativity Global - American Depositary Sh...,G,N,N,100.0,N,N
3,2016-05-01,5.58,5.73,4.41,4.95,0.474744,188700.0,AACG,AACG,ATA Creativity Global - American Depositary Sh...,G,N,N,100.0,N,N
4,2016-06-01,4.93,5.7,4.57,4.63,0.444054,177800.0,AACG,AACG,ATA Creativity Global - American Depositary Sh...,G,N,N,100.0,N,N


In [13]:
# cryptos = get_top_crypto()
# cryptos.head()

### Testing out some visuals / limitations of altair

In [55]:
criteriaDf = fullNasdaqDf[(fullNasdaqDf['low']>100)&(fullNasdaqDf['high']<120)&(fullNasdaqDf['date']>datetime.datetime(2019,1,1))&(fullNasdaqDf['Market Category']=='Q')&(fullNasdaqDf['volume']>5000000)]
nasdaqCuttickers = list(criteriaDf['ticker'].unique())
nasdaqCut = fullNasdaqDf[fullNasdaqDf['ticker'].isin(nasdaqCuttickers)]

In [82]:
# selection = alt.selection_multi(fields=["Stock"], bind="legend")
base = alt.Chart(nasdaqCut).mark_line().encode(
   x=alt.X("date",axis=alt.Axis(grid=False)),
   y="close",
   color=alt.Color('ticker', scale=alt.Scale(scheme='redblue')),
   # opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
).properties(
   height=500, width=1800
)#.add_selection(
   #selection
#)

quantiles = base.transform_quantile(
    "close", probs=[1,0.75,0.5,.25,0], groupby=["date"]
).mark_area(opacity=1).encode(alt.Y("value:Q", stack=None), alt.Color("prob:N", scale=alt.Scale(scheme='greys')))

quantiles
# base
# quantiles + base

In [66]:
cryptoCorr = cryptos[['Name','Volume in Currency (24Hr)','% Change']]
cryptoCorr = cryptoCorr.T
new_header = cryptoCorr.iloc[0].values.tolist()
cryptoCorr = cryptoCorr[1:] #take the data less the header row
cryptoCorr.columns = new_header
cryptoCorr = cryptoCorr[['Bitcoin USD','Ethereum USD','Binance Coin USD']]
cryptoCorr#.corr()

Unnamed: 0,Bitcoin USD,Ethereum USD,Binance Coin USD
Volume in Currency (24Hr),26532000000.0,12400000000.0,1807000000.0
% Change,-0.45,-0.9,-0.37


### Modeling

In [28]:
#list of columns that are categorical
cat_f = ['ticker']

In [38]:
s = setup(data = amazon_weekly, target = 'high', fold_strategy = 'timeseries',  fold = 3, categorical_features = cat_f, session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,high
2,Original Data,"(144, 7)"
3,Missing Values,False
4,Numeric Features,5
5,Categorical Features,1
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(100, 3)"


In [42]:
best = compare_models(sort = 'MAE') #default is 'Accuracy'

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
huber,Huber Regressor,41.7164,6804.2064,82.4613,0.9933,0.053,0.0376,0.02
llar,Lasso Least Angle Regression,45.7731,6024.2133,77.4229,0.9939,0.0727,0.0537,0.0133
omp,Orthogonal Matching Pursuit,47.4494,6476.0086,80.1239,0.9934,0.0687,0.052,0.0133
br,Bayesian Ridge,49.5847,6222.6293,78.493,0.9937,0.0901,0.0652,0.0133
lasso,Lasso Regression,49.6412,6235.8807,78.5693,0.9937,0.0901,0.0652,0.8167
en,Elastic Net,49.6412,6235.8787,78.5693,0.9937,0.0901,0.0652,0.0133
lar,Least Angle Regression,49.6415,6235.9377,78.5696,0.9937,0.0901,0.0652,0.0133
lr,Linear Regression,49.6415,6235.9408,78.5696,0.9937,0.0901,0.0652,0.75
ridge,Ridge Regression,49.6415,6235.9331,78.5696,0.9937,0.0901,0.0652,0.0133
et,Extra Trees Regressor,52.8604,8292.9131,88.1246,0.9924,0.0912,0.0632,0.25


In [43]:
huber = create_model('huber')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,49.6974,6402.6967,80.0169,0.993,0.0707,0.0555
1,32.6844,6766.3518,82.2578,0.9923,0.0528,0.0302
2,42.7675,7243.5707,85.1092,0.9945,0.0353,0.0272
Mean,41.7164,6804.2064,82.4613,0.9933,0.053,0.0376
SD,6.9852,344.3274,2.0839,0.0009,0.0145,0.0127
