#### Data Loader to download daily yahoo price data daily and persist in csv files

In [287]:
import pickle
import random_forest
import pandas_datareader as pdr
import pandas as pd
import numpy as np
import datetime
from datetime import date
import yfinance as yf

pd.options.mode.chained_assignment = None

In [288]:
def get_current_price(symbol):
    yf.pdr_override()
    data = pdr.get_data_yahoo(symbol, start=date.today() - datetime.timedelta(1), end=date.today())
    return data['Close'].values

def get_historical(symbol):
    end_date = date.today() - datetime.timedelta(2)
    start_date = end_date - datetime.timedelta(730)
    yf.pdr_override()
    data = pdr.get_data_yahoo(symbol, start=start_date, end=end_date)
    return data

# Calculate proportion change from one column to another
def p_change(df, col1, col2):
    col1_s = pd.Series(df[col1])
    col2_s = pd.Series(df[col2])
    pc_c1_c2 = []
    for idx, value in enumerate(col1_s):
        pc = col2_s[idx] - col1_s[idx]
        pc = pc / col1_s[idx]
        pc_c1_c2.append(pc)
    return pc_c1_c2

#### Download historical price data from yahoo

In [290]:
# save historical data so don't have to reload it
symbols = ['BTC','ETH','ADA','DOGE','XRP','HEX','BCH','LTC','LINK','MATIC','THETA']
symbols = symbols + ['XLM','VET','ETC','TRX','FIL','XMR','EOS','ALGO','CRO','TFUEL','BSV']
symbols = symbols + ['NEO','XTZ','MIOTA','LUNA1','MKR','ATOM1','KSM','BTT1','HBAR','RUNE','CHZ']
symbols = symbols + ['WAVES','DCR','CEL','ZEC','DASH','HOT1','XEM','QNT','ZIL']
symbols = symbols + ['ENJ','BAT','STX1','MANA','SNX','XWC','ZEN','BTG','NANO','BNT','DGB','ONE2']
symbols = symbols + ['QTUM','ARRR','ONT','SC','ZRX','OMG','ANKR','ICX','RVN','BCD','XVG']
symbols = symbols + ['CKB','IOST','RSR','MAID','KNC','HNC','LRC']
symbols = symbols + ['LSK','KAVA','VTHO','RLC','GNO','BAND','STORJ']
symbols = symbols + ['MCO','ABBC','FUN','OXT','WAXP','SNT','FET','IOTX','NKN','ANT','REP','BTS','CVC','DERO']
symbols = symbols + ['MLN','TOMO','AVA','ARDR','XHV','ETN','BCN']

import yfinance as yf

end_date = date.today() - datetime.timedelta(1)
start_date = end_date - datetime.timedelta(730)

for i, symbol in enumerate(symbols):
    print(symbol,'start',start_date,'end',end_date)
    data = yf.download(symbol + '-USD', start=start_date, end=end_date)
    data.to_csv('c:/users/steve/coinpix/notebooks/price-data/' + symbol + '.csv')

BTC start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
ETH start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
ADA start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
DOGE start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
XRP start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
HEX start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
BCH start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
LTC start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
LINK start 2019-08-05 end 2021-08-04
[*********************100%***********************]  1 of 1 completed
MATIC start 2019-08-05 end 2021-08-04
[**************

#### augment historical prices with differentials for classification

In [291]:
for i, symbol in enumerate(symbols):
    try:
        df = pd.read_csv('price-data/' + symbol + '.csv')
        df.drop(df.tail(1).index,inplace=True) # drop last row

        # prepare data for classifier
        sma15c = df.Close.rolling(15,1).mean()
        sma30c = df.Close.rolling(30,1).mean()
        pc_sma15c_sma30c = []
        for idx, value in enumerate(sma15c):
            pc = sma30c[idx] - sma15c[idx]
            pc = pc / sma30c[idx]
            pc_sma15c_sma30c.append(pc)
        df['pc_sma15c_sma30c'] = pc_sma15c_sma30c
        df['pc_open_close'] = p_change(df, 'Open', 'Close')
        df['pc_high_low'] = p_change(df, 'High', 'Low')
        df['pc_low_close'] = p_change(df, 'Low', 'Close')
        df['symbol'] = symbol
        column_order = ['symbol','Date','High','Low','Open','Close','Volume','Adj Close','pc_sma15c_sma30c','pc_open_close','pc_high_low','pc_low_close']
        df = df[column_order]
        
        # save data for classifier
        df.to_csv('price-data/' + symbol + '.csv')
    except:
        print(symbol, "error loading")