# Predicting price movements for stocks and cryptocurrencies

### Installation of libraries (consider conda install for working env first)

In [1]:
# pip install pandas

In [2]:
# pip install requests_html

In [3]:
# pip install scikit-plot

In [4]:
# pip install tensorflow

In [5]:
# pip install --upgrade pip

In [6]:
# pip install yahoo_fin

In [7]:
# pip install yfinance --upgrade --user --no-cache-dir

In [8]:
# pip install --user pycaret

In [9]:
# pip install scikit-learn==0.23.2

### Importing relevant libraries

In [37]:
import ftplib
import io
import pandas as pd
import requests
import requests_html
import numpy as np

from pycaret.classification import *
from pycaret.regression import *

import yfinance as yf
from yahoo_fin.stock_info import get_data, get_top_crypto, get_analysts_info
# Dow: tickers_dow()
# Nasdaq: tickers_nasdaq()
# S&P500: tickers_sp500()
# Others: tickers_other()
import yahoo_fin.stock_info as si
import yahoo_fin.options as ops

import tensorflow as tf



### Yahoo_fin has two modules - stock_info and options.
---------
#### Stock_info has the following methods:

#### get_analysts_info(), get_balance_sheet(), get_cash_flow(), get_data(), get_day_gainers(), get_day_losers(), get_day_most_active(), get_holders(), get_income_statement(), get_live_price(), get_quote_table(), get_top_crypto(), get_stats(), get_stats_valuation(), tickers_dow(), tickers_nasdaq(), tickers_other(), tickers_sp500()
---------
#### And options has:

#### get_calls(), get_expiration_dates(), get_options_chain(), get_puts()
---------

#### And there are the methods you can’t use without requests_html are:

#### stock_info module, get_day_gainers(), get_day_most_active(), get_day_losers(), get_top_crypto(), get_expiration_dates()
---------

In [26]:
# ticker: case insensitive ticker of the desired stock/bond
# start_date: date you want the data to start from (mm/dd/yyyy)
# end_date: date you want the data to end (mm/dd/yyyy)
# index_as_date: {True, False}. Default is true. If true then the dates of the records are set as the index, else they are returned as a separate column.
# interval: {“1d”, “1wk”, “1mo”}. Refers to the interval to sample the data: “1d”= daily, “1wk”= weekly, “1mo”=monthly.
# get_data(ticker, start_date = None, end_date = None, index_as_date = True, interval = “1d”)

amazon_weekly= get_data("amzn", start_date="12/04/2009", end_date="12/04/2021", index_as_date = True, interval="1mo")
amazon_weekly.head()

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2010-01-01,136.25,136.610001,118.120003,125.410004,125.410004,230861000,AMZN
2010-02-01,123.18,124.860001,113.82,118.400002,118.400002,210145800,AMZN
2010-03-01,118.699997,138.190002,117.529999,135.770004,135.770004,158042600,AMZN
2010-04-01,135.800003,151.089996,130.779999,137.100006,137.100006,173025100,AMZN
2010-05-01,137.199997,139.440002,117.519997,125.459999,125.459999,140909900,AMZN


In [27]:
cryptos = get_top_crypto()
cryptos.head()

Unnamed: 0,Symbol,Name,Price (Intraday),Change,% Change,Market Cap,Volume in Currency (Since 0:00 UTC),Volume in Currency (24Hr),Total Volume All Currencies (24Hr),Circulating Supply
0,BTC-USD,Bitcoin USD,47715.79,57.81,0.12,902548000000.0,28656000000.0,28656000000.0,28656000000.0,18915000.0
1,ETH-USD,Ethereum USD,3753.31,-21.92,-0.58,446473000000.0,15032000000.0,15032000000.0,15032000000.0,118955000.0
2,BNB-USD,Binance Coin USD,523.82,-1.08,-0.21,87374000000.0,2087000000.0,2087000000.0,2087000000.0,166801000.0
3,USDT-USD,Tether USD,1.0001,-0.0001,-0.01,78349000000.0,59111000000.0,59111000000.0,59111000000.0,78345000000.0
4,HEX-USD,HEX USD,0.339665,0.022129,6.97,58902000000.0,74952000.0,74952000.0,74952000.0,173411000000.0


In [28]:
#list of columns that are categorical
cat_f = ['ticker']

In [38]:
s = setup(data = amazon_weekly, target = 'high', fold_strategy = 'timeseries',  fold = 3, categorical_features = cat_f, session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,high
2,Original Data,"(144, 7)"
3,Missing Values,False
4,Numeric Features,5
5,Categorical Features,1
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(100, 3)"


In [42]:
best = compare_models(sort = 'MAE') #default is 'Accuracy'

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
huber,Huber Regressor,41.7164,6804.2064,82.4613,0.9933,0.053,0.0376,0.02
llar,Lasso Least Angle Regression,45.7731,6024.2133,77.4229,0.9939,0.0727,0.0537,0.0133
omp,Orthogonal Matching Pursuit,47.4494,6476.0086,80.1239,0.9934,0.0687,0.052,0.0133
br,Bayesian Ridge,49.5847,6222.6293,78.493,0.9937,0.0901,0.0652,0.0133
lasso,Lasso Regression,49.6412,6235.8807,78.5693,0.9937,0.0901,0.0652,0.8167
en,Elastic Net,49.6412,6235.8787,78.5693,0.9937,0.0901,0.0652,0.0133
lar,Least Angle Regression,49.6415,6235.9377,78.5696,0.9937,0.0901,0.0652,0.0133
lr,Linear Regression,49.6415,6235.9408,78.5696,0.9937,0.0901,0.0652,0.75
ridge,Ridge Regression,49.6415,6235.9331,78.5696,0.9937,0.0901,0.0652,0.0133
et,Extra Trees Regressor,52.8604,8292.9131,88.1246,0.9924,0.0912,0.0632,0.25


In [43]:
huber = create_model('huber')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,49.6974,6402.6967,80.0169,0.993,0.0707,0.0555
1,32.6844,6766.3518,82.2578,0.9923,0.0528,0.0302
2,42.7675,7243.5707,85.1092,0.9945,0.0353,0.0272
Mean,41.7164,6804.2064,82.4613,0.9933,0.053,0.0376
SD,6.9852,344.3274,2.0839,0.0009,0.0145,0.0127
