In [1]:
"""
Data (Daily & Minute): Binance API-Will need Binance API keys to be able to pull the data. 
Binance API Documentation: https://binance-docs.github.io/apidocs/spot/en/#introduction

"""

'\nData (Daily & Minute): Binance API-Will need Binance API keys to be able to pull the data. \nBinance API Documentation: https://binance-docs.github.io/apidocs/spot/en/#introduction\n\n'

In [2]:
# J.Guanzon Comment-Imports needed to run this file
from binance import Client, ThreadedWebsocketManager, ThreadedDepthCacheManager
import pandas as pd
import mplfinance as mpl
import mplfinance as mpf
import os
import json
import requests
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, LSTM
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.model_selection import train_test_split
import numpy as np
from pathlib import Path
import seaborn as sns
from sklearn.metrics import mean_absolute_error
%matplotlib inline

In [3]:
# Pull API keys from .env file
api_key = os.environ.get("api_key")
api_secret = os.environ.get("api_secret")

In [4]:
client = Client(api_key, api_secret)

In [5]:
# J.Guanzon Comment: Gather tickers for all
tickers = client.get_all_tickers()

In [6]:
ticker_df = pd.DataFrame(tickers)

In [7]:
ticker_df.set_index('symbol', inplace=True)
ticker_df

Unnamed: 0_level_0,price
symbol,Unnamed: 1_level_1
ETHBTC,0.06242800
LTCBTC,0.00309400
BNBBTC,0.00821400
NEOBTC,0.00079400
QTUMETH,0.00376000
...,...
SHIBAUD,0.00004065
RAREBTC,0.00004211
RAREBNB,0.00514100
RAREBUSD,2.37500000


In [8]:
""" 
Ability to save csv file of all tickers.
Allows the user to see what types of cryptocurrencies are out there.
For now, we will only focus on Bitcoin.
"""

' \nAbility to save csv file of all tickers.\nAllows the user to see what types of cryptocurrencies are out there.\nFor now, we will only focus on Bitcoin.\n'

In [9]:
ticker_df.to_csv("Resources/binance_tickers.csv")

In [10]:
display(float(ticker_df.loc['BTCUSDT']['price']))

56131.37

In [11]:
depth = client.get_order_book(symbol='BTCUSDT')

In [12]:
depth_df = pd.DataFrame(depth['asks'])
depth_df.columns = ['Price', 'Volume']
depth_df.head()

Unnamed: 0,Price,Volume
0,56131.84,1.82533
1,56131.88,0.00093
2,56133.52,0.10908
3,56136.02,0.2796
4,56136.07,0.25037


In [13]:
"""
Pulling historical daily data
"""

'\nPulling historical daily data\n'

In [14]:
btc_daily_data = client.get_historical_klines('BTCUSDT', Client.KLINE_INTERVAL_1DAY, '1 Jan 2021')

In [15]:
btc_daily_df = pd.DataFrame(btc_daily_data)
btc_daily_df.columns = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume', 
                    'Number of Trades', 'TB Base Volume', 'TB Quote Volume', 'Ignore']

In [16]:
btc_daily_df['Open Time'] = pd.to_datetime(btc_daily_df['Open Time']/1000, unit='s')
btc_daily_df['Close Time'] = pd.to_datetime(btc_daily_df['Close Time']/1000, unit='s')

In [17]:
numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Quote Asset Volume', 'TB Base Volume', 'TB Quote Volume']
btc_daily_df[numeric_columns] = btc_daily_df[numeric_columns].apply(pd.to_numeric, axis=1)

In [18]:
btc_ohlcv_daily = btc_daily_df.iloc[:,0:6]
btc_ohlcv_daily = btc_ohlcv_daily.set_index('Open Time')
btc_ohlcv_daily

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01,28923.63,29600.00,28624.57,29331.69,54182.925011
2021-01-02,29331.70,33300.00,28946.53,32178.33,129993.873362
2021-01-03,32176.45,34778.11,31962.99,33000.05,120957.566750
2021-01-04,33000.05,33600.00,28130.00,31988.71,140899.885690
2021-01-05,31989.75,34360.00,29900.00,33949.53,116049.997038
...,...,...,...,...,...
2021-10-09,53955.67,55489.00,53661.67,54949.72,55177.080130
2021-10-10,54949.72,56561.31,54080.00,54659.00,89237.836128
2021-10-11,54659.01,57839.04,54415.06,57471.35,52933.165751
2021-10-12,57471.35,57680.00,53879.00,55996.93,53471.285500


In [19]:
btc_ohlcv_daily.to_csv("Resources/daily_btc_ohclv_2021.csv")

In [20]:
"""
Pulling historical minute data 
"""

'\nPulling historical minute data \n'

In [21]:
historical_minute = client.get_historical_klines('BTCUSDC', Client.KLINE_INTERVAL_1MINUTE, '5 day ago UTC')

In [22]:
hist_min = pd.DataFrame(historical_minute)

In [23]:
hist_min.columns = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume', 
                    'Number of Trades', 'TB Base Volume', 'TB Quote Volume', 'Ignore']

In [24]:
hist_min['Open Time'] = pd.to_datetime(hist_min['Open Time']/1000, unit='s')
hist_min['Close Time'] = pd.to_datetime(hist_min['Close Time']/1000, unit='s')


In [25]:
numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Quote Asset Volume', 'TB Base Volume', 'TB Quote Volume']
hist_min[numeric_columns] = hist_min[numeric_columns].apply(pd.to_numeric, axis=1)

In [26]:
btc_ohlcv_minute = hist_min.iloc[:,0:6]
btc_ohlcv_minute = btc_ohlcv_minute.set_index('Open Time')
btc_ohlcv_minute

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-08 04:47:00,53930.92,53940.04,53910.38,53911.27,0.22636
2021-10-08 04:48:00,53927.65,53946.47,53920.29,53946.47,0.09104
2021-10-08 04:49:00,53952.22,53982.87,53942.61,53968.60,0.04603
2021-10-08 04:50:00,53968.90,53981.20,53930.10,53930.10,1.12852
2021-10-08 04:51:00,53933.94,53944.39,53925.67,53925.67,0.14553
...,...,...,...,...,...
2021-10-13 04:42:00,56208.80,56223.74,56206.19,56207.08,0.43183
2021-10-13 04:43:00,56217.48,56217.48,56193.90,56193.90,0.20990
2021-10-13 04:44:00,56186.35,56195.75,56161.82,56164.60,0.12363
2021-10-13 04:45:00,56163.56,56163.56,56141.39,56141.39,0.88368


In [27]:
btc_ohlcv_minute.to_csv("Resources/minute_btc_ohclv_2021.csv")

In [28]:
"""
Next, we will be using the daily data for our Recurrent Neural Network. We are using Recurrent Neural Network for its use of time series and sequential data. 
RNN specializes in using information from prior inputs and uses it to influence current inputs and outputs, and the cycle repeats. 
"""

'\nNext, we will be using the daily data for our Recurrent Neural Network. We are using Recurrent Neural Network for its use of time series and sequential data. \nRNN specializes in using information from prior inputs and uses it to influence current inputs and outputs, and the cycle repeats. \n'

In [None]:
btc_df = pd.read_csv(Path("Resources/daily_btc_ohclv_2021.csv"), 
                     index_col= "Open Time")
target_col = 'Close'

In [70]:
def normalize_timestep(timestep, reference_list):
    reference_price = timestep[0][0]
    reference_list.append(reference_price)

    temp_volume = np.copy(timestep[:, 3])
    reference_volume = np.copy(timestep[0, 3])

    timestep = (timestep / reference_price) - 1
    timestep[:, 3] = (temp_volume / reference_volume) - 1
    return timestep
normalize_timestep

<function __main__.normalize_timestep(timestep, reference_list)>

In [71]:

#take data and split into timeseries so that we can train the model
def load_data(btc_df, num_timesteps, target_len, train_percent=.75):

    # iterate so that we can also capture a sequence for a target
    combined_length = num_timesteps + target_len

    # segment the data into timeseries (these will be overlapping)
    result = []
    for index in range(len(btc_df) - combined_length):
        time_series = btc_df[index: index + combined_length]
        result.append(time_series[:])

    result = np.asarray(result)

    # normalize
    reference_points = [] #for de-normalizing outside of the function
    for i in range(0, len(result)):
        result[i] = normalize_timestep(result[i], reference_points)


    # train test split
    row = round(train_percent * result.shape[0])
    train = result[:int(row), :]
    test = result[int(row):, :]

    split_index = len(train[0]) - target_len
    x_train = train[:, :split_index]
    y_train = train[:, split_index:, -1]

    x_test = test[:, :split_index]
    y_test = test[:, split_index:, -1]

    return [x_train, y_train, x_test, y_test, reference_points]
load_data

<function __main__.load_data(btc_df, num_timesteps, target_len, train_percent=0.75)>

In [72]:
def generate_graph(stock_name, days_back, num_timesteps, target_len, minimum_days=500):
    stock_name = stock_name
    stock_data = get_stock_data(stock_name, days_back, minimum_days)

    X_train, y_train, X_test, y_test, ref = load_data(stock_data, num_timesteps, target_len=target_len, train_percent=.9)

    # store recent data so that we can get a live prediction
    recent_reference = []
    recent_data = stock_data[-num_timesteps:]
    recent_data = normalize_timestep(recent_data, recent_reference)

    print("    X_train", X_train.shape)
    print("    y_train", y_train.shape)
    print("    X_test", X_test.shape)
    print("    y_test", y_test.shape)

    # setup model
    print("TRAINING")
    model = build_model([5, num_timesteps, target_len])
    model.fit(
        X_train,
        y_train,
        batch_size=512,
        epochs=1,
        validation_split=0.1,
        verbose=2)

    #train the model
    trainScore = model.evaluate(X_train, y_train, verbose=100)
    print('Train Score: %.2f MSE (%.2f RMSE) (%.2f)' % (trainScore[0], math.sqrt(trainScore[0]), trainScore[1]))

    testScore = model.evaluate(X_test, y_test, verbose=100)
    print('Test Score: %.2f MSE (%.2f RMSE) (%.2f)' % (testScore[0], math.sqrt(testScore[0]), testScore[1]))

    #make predictions
    print("PREDICTING")
    p = model.predict(X_test)
    recent_data = [recent_data] # One-sample predictions need list wrapper. Argument must be 3d.
    recent_data = np.asarray(recent_data)
    future = model.predict([recent_data])

    # document results in file
    print("WRITING TO LOG")
    file = open("log.txt", "w")
    for i in range(0, len(X_train)):
        for s in range(0, num_timesteps):
            file.write(str(X_train[i][s]) + "\n")
        file.write("Target: " + str(y_train[i]) + "\n")
        file.write("\n")

    for i in range(0, len(X_test)):
        for s in range(0, num_timesteps):
            file.write(str(X_test[i][s]) + "\n")
        file.write("Target: " + str(y_test[i]) + "\n")
        file.write("Prediction: " + str(p[i]) + "\n")
        file.write("\n")

    # de-normalize
    for i in range(0, len(p)):
        p[i] = (p[i] + 1) * ref[round(.9 * len(ref) + i)]
        y_test[i] = (y_test[i] + 1) * ref[round(.9 * len(ref) + i)]

    future[0] = (future[0] + 1) * recent_reference[0]
    recent_data[0] = (recent_data[0] + 1) * recent_reference[0]

    # plot historical predictions
    for i in range(0, len(p)):
        if i % (target_len*2) == 0:
            plot_index = i #for filling plot indexes
            plot_indexes = []
            plot_values = p[i]
            for j in range(0, target_len):
                plot_indexes.append(plot_index)
                plot_index += 1
            plt.plot(plot_indexes, plot_values, color="red")

    # plot historical actual
    plt.plot(y_test[:, 0], color='blue', label='Actual') # actual stock price history

    # plot recent prices
    plot_indexes = [len(y_test) - 1]
    plot_values = [y_test[-1, 0]]
    plot_index = None
    for i in range(0, len(recent_data[0])):
        plot_values.append(recent_data[0][i][0])
        plot_index = len(y_test) + i
        plot_indexes.append(len(y_test)+i)
    plt.plot(plot_indexes, plot_values, color='blue')

    # plot future predictions
    plot_indexes = [plot_index]
    plot_values = [recent_data[0][-1][0]]
    for i in range(0, len(future[0])):
        plot_index += 1
        plot_values.append(future[0][i])
        plot_indexes.append(plot_index)
    plt.plot(plot_indexes, plot_values, color="red", label="Prediction")

    #show plot
    plt.legend(loc="upper left")
    plt.title(stock_name + " Price Predictions")
    plt.xlabel("Days")
    plt.ylabel("Price ($)")
    plt.show()

    return True

generate_graph

In [72]:
if __name__ == "__main__":
    fire.Fire(run)

<function __main__.generate_graph(stock_name, days_back, num_timesteps, target_len, minimum_days=500)>

In [None]:
tickers = read_stocks("ftp://ftp.nasdaqtrader.com/symboldirectory/nasdaqlisted.txt")
num_days_back = 3700

for ticker in tickers:
    print("Ticker:" + str(ticker))

    try:
        isDone = generate_graph(ticker, num_days_back, 100, 30)
    except quandl.errors.quandl_error.NotFoundError:
        continue

    # generate_graph(ticker, 300, 20, 10) #FOR TESTING

SyntaxError: unexpected EOF while parsing (<ipython-input-39-8cc0034dc7be>, line 11)