In [30]:
import time
import dateparser
import pytz
import json
import talib
import pandas as pd 
from datetime import datetime
from binance.client import Client
from keras.models import Sequential
from keras.layers import Dense

In [31]:
def date_to_milliseconds(date_str): # date to milliseconds
    """Convert UTC date to milliseconds
    If using offset strings add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"
    See dateparse docs for formats http://dateparser.readthedocs.io/en/latest/
    :param date_str: date in readable format, i.e. "January 01, 2018", "11 hours ago UTC", "now UTC"
    :type date_str: str
    """
    # get epoch value in UTC
    epoch = datetime.utcfromtimestamp(0).replace(tzinfo=pytz.utc)
    # parse our date string
    d = dateparser.parse(date_str)
    # if the date is not timezone aware apply UTC timezone
    if d.tzinfo is None or d.tzinfo.utcoffset(d) is None:
        d = d.replace(tzinfo=pytz.utc)

    # return the difference in time
    return int((d - epoch).total_seconds() * 1000.0)

In [32]:
def interval_to_milliseconds(interval): # interval to milliseconds
    """Convert a Binance interval string to milliseconds
    :param interval: Binance interval string 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w
    :type interval: str
    :return:
         None if unit not one of m, h, d or w
         None if string not in correct format
         int value of interval in milliseconds
    """
    ms = None
    seconds_per_unit = {
        "m": 60,
        "h": 60 * 60,
        "d": 24 * 60 * 60,
        "w": 7 * 24 * 60 * 60
    }

    unit = interval[-1]
    if unit in seconds_per_unit:
        try:
            ms = int(interval[:-1]) * seconds_per_unit[unit] * 1000
        except ValueError:
            pass
    return ms

In [33]:
def get_historical_klines(symbol, interval, start_str, end_str=None):
    """Get Historical Klines from Binance
    See dateparse docs for valid start and end string formats http://dateparser.readthedocs.io/en/latest/
    If using offset strings for dates add "UTC" to date string e.g. "now UTC", "11 hours ago UTC"
    :param symbol: Name of symbol pair e.g BNBBTC
    :type symbol: str
    :param interval: Biannce Kline interval
    :type interval: str
    :param start_str: Start date string in UTC format
    :type start_str: str
    :param end_str: optional - end date string in UTC format
    :type end_str: str
    :return: list of OHLCV values
    """
    # create the Binance client, no need for api key
    client = Client("", "")

    # init our list
    output_data = []

    # setup the max limit
    limit = 500

    # convert interval to useful value in seconds
    timeframe = interval_to_milliseconds(interval)

    # convert our date strings to milliseconds
    start_ts = date_to_milliseconds(start_str)

    # if an end time was passed convert it
    end_ts = None
    if end_str:
        end_ts = date_to_milliseconds(end_str)

    idx = 0
    # it can be difficult to know when a symbol was listed on Binance so allow start time to be before list date
    symbol_existed = False
    while True:
        # fetch the klines from start_ts up to max 500 entries or the end_ts if set
        temp_data = client.get_klines(
            symbol=symbol,
            interval=interval,
            limit=limit,
            startTime=start_ts,
            endTime=end_ts
        )

        # handle the case where our start date is before the symbol pair listed on Binance
        if not symbol_existed and len(temp_data):
            symbol_existed = True

        if symbol_existed:
            # append this loops data to our output data
            output_data += temp_data

            # update our start timestamp using the last value in the array and add the interval timeframe
            start_ts = temp_data[len(temp_data) - 1][0] + timeframe
        else:
            # it wasn't listed yet, increment our start date
            start_ts += timeframe

        idx += 1
        # check if we received less than the required limit and exit the loop
        if len(temp_data) < limit:
            # exit the while loop
            break

        # sleep after every 3rd call to be kind to the API
        if idx % 3 == 0:
            time.sleep(1)

    return output_data


In [34]:
# setting desired dataset characteristics
symbol = "ETHUSDT" 
start = "1 Nov, 2019"
end = "1 Nov, 2021"
interval = Client.KLINE_INTERVAL_30MINUTE  # https://github.com/sammchardy/python-binance/blob/master/binance/client.py

# requesting dataset into pandas df 
columns = ['Open time', 'Open',
'High','Low','Close','Volume','Close time','Quote asset volume','Number of trades','Taker buy base asset volume','Taker buy quote asset volume','Ignore']
dataset = pd.DataFrame(get_historical_klines(symbol, interval, start, end), columns=columns)

  date_obj = stz.localize(date_obj)


In [36]:
# shape of data
dataset.shape

(35010, 12)

In [37]:
# look at head of data
dataset.head()

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,Ignore
0,1572566400000,182.19,182.22,181.2,181.39,5265.95771,1572568199999,956104.2352134,2909,2566.65305,465982.1802531,0
1,1572568200000,181.39,181.96,181.29,181.91,4120.24802,1572569999999,748907.0951106,2249,1742.04669,316606.129592,0
2,1572570000000,181.84,182.64,181.82,182.09,3980.20413,1572571799999,725620.9065028,2404,1770.83312,322812.4094534,0
3,1572571800000,182.1,182.5,181.55,181.55,2778.56939,1572573599999,505752.6693028,2318,1384.92003,252128.4281608,0
4,1572573600000,181.55,182.0,181.28,181.64,3789.0253,1572575399999,688209.1012924,2592,1714.17791,311379.4279115,0


In [11]:
# Observation Data Types
dataset.dtypes

Open time                        int64
Open                            object
High                            object
Low                             object
Close                           object
Volume                          object
Close time                       int64
Quote asset volume              object
Number of trades                 int64
Taker buy base asset volume     object
Taker buy quote asset volume    object
Ignore                          object
dtype: object

In [47]:
dataset= dataset.astype(float)
df1 = dataset["Close"].pct_change() * 100

In [50]:
df1 = df1.rename("Today")

In [51]:
df1 = df1.reset_index()

In [53]:
for i in range (1,6): 
    df1['Lag ' + str(i)] = df1["Today"].shift(i)

In [55]:
df1["index"] = dataset["Open time"]

In [56]:
df1

Unnamed: 0,index,Today,Lag 1,Lag 2,Lag 3,Lag 4,Lag 5
0,1.572566e+12,,,,,,
1,1.572568e+12,0.286675,,,,,
2,1.572570e+12,0.098950,0.286675,,,,
3,1.572572e+12,-0.296557,0.098950,0.286675,,,
4,1.572574e+12,0.049573,-0.296557,0.098950,0.286675,,
...,...,...,...,...,...,...,...
35005,1.635718e+12,-0.004644,0.726483,0.241025,-0.026018,0.433163,-0.215410
35006,1.635719e+12,-0.307692,-0.004644,0.726483,0.241025,-0.026018,0.433163
35007,1.635721e+12,-0.098532,-0.307692,-0.004644,0.726483,0.241025,-0.026018
35008,1.635723e+12,-0.036374,-0.098532,-0.307692,-0.004644,0.726483,0.241025


In [57]:
df1['Volume'] = dataset["Volume"].shift(i).values/1000_000_000

In [58]:
df1

Unnamed: 0,index,Today,Lag 1,Lag 2,Lag 3,Lag 4,Lag 5,Volume
0,1.572566e+12,,,,,,,
1,1.572568e+12,0.286675,,,,,,
2,1.572570e+12,0.098950,0.286675,,,,,
3,1.572572e+12,-0.296557,0.098950,0.286675,,,,
4,1.572574e+12,0.049573,-0.296557,0.098950,0.286675,,,
...,...,...,...,...,...,...,...,...
35005,1.635718e+12,-0.004644,0.726483,0.241025,-0.026018,0.433163,-0.215410,0.000004
35006,1.635719e+12,-0.307692,-0.004644,0.726483,0.241025,-0.026018,0.433163,0.000006
35007,1.635721e+12,-0.098532,-0.307692,-0.004644,0.726483,0.241025,-0.026018,0.000004
35008,1.635723e+12,-0.036374,-0.098532,-0.307692,-0.004644,0.726483,0.241025,0.000005


In [63]:
df1 = df1.dropna()

In [61]:
df1["Direction"] = [1 if i > 0 else 0 for i in df1["Today"]]

In [66]:
df1

Unnamed: 0,index,Today,Lag 1,Lag 2,Lag 3,Lag 4,Lag 5,Volume,Direction,const
6,1.572577e+12,-0.895506,0.209205,0.049573,-0.296557,0.098950,0.286675,0.000004,0,1
7,1.572579e+12,-0.160763,-0.895506,0.209205,0.049573,-0.296557,0.098950,0.000004,0,1
8,1.572581e+12,0.294281,-0.160763,-0.895506,0.209205,0.049573,-0.296557,0.000003,1,1
9,1.572583e+12,0.033217,0.294281,-0.160763,-0.895506,0.209205,0.049573,0.000004,1,1
10,1.572584e+12,0.188168,0.033217,0.294281,-0.160763,-0.895506,0.209205,0.000005,1,1
...,...,...,...,...,...,...,...,...,...,...
35005,1.635718e+12,-0.004644,0.726483,0.241025,-0.026018,0.433163,-0.215410,0.000004,0,1
35006,1.635719e+12,-0.307692,-0.004644,0.726483,0.241025,-0.026018,0.433163,0.000006,0,1
35007,1.635721e+12,-0.098532,-0.307692,-0.004644,0.726483,0.241025,-0.026018,0.000004,0,1
35008,1.635723e+12,-0.036374,-0.098532,-0.307692,-0.004644,0.726483,0.241025,0.000005,0,1


In [84]:
len(df1)

35004

In [151]:
# X_train = df1[:11668][['const', 'Lag 1', 'Lag 2', 'Lag 3', 'Lag 4', 'Lag 5', 'Volume' ]].values
X_train = df1[:11668][['const','Lag 1', 'Lag 2', 'Lag 3', 'Lag 4', 'Lag 5', 'Volume' ]].values
y_train = df1[:11668][['Direction']].values

X_test1 = df1[11668:(11668 + 11668)][['const','Lag 1', 'Lag 2', 'Lag 3', 'Lag 4', 'Lag 5', 'Volume' ]].values
y_test1 = df1[11668:(11668 + 11668)][['Direction']].values

X_test2 = df1[11668 + 11668:][['const', 'Lag 1', 'Lag 2', 'Lag 3', 'Lag 4', 'Lag 5', 'Volume' ]].values
y_test2  = df1[11668 + 11668:][['Direction']].values

# Modeling  - [WORKING]

In [152]:
from keras.models import Sequential
from keras.layers import Dense

In [160]:
# input variables (X)
X = X_train
# output variables (y)
y = y_train

In [None]:
# define the keras model
model = Sequential()
model.add(Dense(24, input_dim=7, activation='relu6')) # input_dim=12 : number of obervations in x
model.add(Dense(8, activation='sigmoid'))
model.add(Dense(16, activation='relu6'))
model.add(Dense(1, activation='sigmoid'))
model

# Compile Keras Model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# fit the keras model on the dataset
# model.fit(X, y, epochs=150, batch_size=2, verbose=0)
model.fit(X, y, epochs=150, batch_size=2)   # ! OVER-FIT

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [157]:
# evaluate the keras model
_, accuracy = model.evaluate(X_test1, y_test1) # _, accuracy = model.evaluate(X, y, verbose=0)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 54.97


In [158]:
# make class predictions with the model - round predictions
predictionsTest = (model.predict(X_test1) > 0.5).astype(int)

# summarize the first 5 cases
for i in range(10):
	print('%s => %d (expected %d)' % (X_test1[i].tolist(), predictions[i], y_test1[i]))

[1.0, 0.2251959637954215, 0.0997052193514758, -0.19469562583827038, 0.09527521545191764, -0.17724364516686908, 5.82936273e-06] => 0 (expected 0)
[1.0, -0.19012228319578162, 0.2251959637954215, 0.0997052193514758, -0.19469562583827038, 0.09527521545191764, 4.8123114400000005e-06] => 1 (expected 0)
[1.0, 0.0, -0.19012228319578162, 0.2251959637954215, 0.0997052193514758, -0.19469562583827038, 5.81197865e-06] => 1 (expected 0)
[1.0, -0.1645092861162767, 0.0, -0.19012228319578162, 0.2251959637954215, 0.0997052193514758, 6.569e-06] => 1 (expected 1)
[1.0, 0.10840813494643609, -0.1645092861162767, 0.0, -0.19012228319578162, 0.2251959637954215, 1.338038631e-05] => 1 (expected 1)
[1.0, 0.2772242917785661, 0.10840813494643609, -0.1645092861162767, 0.0, -0.19012228319578162, 6.505049020000001e-06] => 0 (expected 1)
[1.0, 0.043196544276447035, 0.2772242917785661, 0.10840813494643609, -0.1645092861162767, 0.0, 9.035715749999999e-06] => 0 (expected 0)
[1.0, -0.22884283246977777, 0.043196544276447035

In [159]:
# evaluate the keras model
_, accuracy = model.evaluate(X_test2, y_test2) # _, accuracy = model.evaluate(X, y, verbose=0)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 51.93


In [139]:
# make class predictions with the model - round predictions
predictionsTest = (model.predict(X_test2) > 0.5).astype(int)

# summarize the first 5 cases
for i in range(10):
	print('%s => %d (expected %d)' % (X_test2[i].tolist(), predictions[i], y_test2[i]))

[-0.20231494990760046, -0.0330597798607557, -0.42600983695441075] => 0 (expected 1)
[1.447015327836354, -0.20231494990760046, -0.0330597798607557] => 1 (expected 0)
[-0.19599052071991307, 1.447015327836354, -0.20231494990760046] => 1 (expected 1)
[0.450508265735694, -0.19599052071991307, 1.447015327836354] => 1 (expected 0)
[-0.0689981217177893, 0.450508265735694, -0.19599052071991307] => 1 (expected 0)
[-0.0038358756664913507, -0.0689981217177893, 0.450508265735694] => 0 (expected 0)
[-0.5236171137764201, -0.0038358756664913507, -0.0689981217177893] => 0 (expected 1)
[0.23779990102383852, -0.5236171137764201, -0.0038358756664913507] => 1 (expected 1)
[0.5103774613209433, 0.23779990102383852, -0.5236171137764201] => 1 (expected 0)
[-0.578595168379481, 0.5103774613209433, 0.23779990102383852] => 1 (expected 1)
