# Binance API

In [1]:
# Setup
from types import MethodDescriptorType
import requests
import hashlib
import threading
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt  

## Collect Data

In [2]:
## Set parameters
root_url = 'https://api.binance.com/api/v1/klines'
symbol = 'NMRUSDT' # 'ETHUSDT'
interval = '1d'
url = root_url + '?symbol=' + symbol + '&interval=' + interval
print(url)

https://api.binance.com/api/v1/klines?symbol=NMRUSDT&interval=1d


In [3]:
def get_bars(symbol, interval):
    """
    Collect data from Binance API
    """
    url = root_url + '?symbol=' + symbol + '&interval=' + interval
    data = dict()
    data['limit'] = 1_000 # 290
    data = json.loads(requests.get(url, params=data).text)
    df = pd.DataFrame(data)
    df.columns = ['open_time',
                  'open', 'high', 'low', 'close', 'volume',
                  'close_time', 'qav', 'num_trades',
                  'taker_base_vol', 'taker_quote_vol', 'ignore']
    df.index = [dt.datetime.fromtimestamp(x/1000.0) for x in df.close_time]
    return df

In [4]:
## // Collect Data //
df = get_bars(symbol,interval)

In [5]:
df.shape

(402, 12)

In [6]:
# Check dataset
df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
2020-08-20 02:59:59.999,1597795200000,50.42,50.42,37.77,42.079,11838.172,1597881599999,505615.156852,4880,4431.593,189946.026105,0
2020-08-21 02:59:59.999,1597881600000,42.281,53.88,39.542,46.93,62972.514,1597967999999,2925713.232345,18560,26088.379,1226343.495844,0
2020-08-22 02:59:59.999,1597968000000,46.952,52.1,41.352,41.896,47528.204,1598054399999,2196731.570203,19241,19556.204,906611.279734,0
2020-08-23 02:59:59.999,1598054400000,42.181,51.0,41.072,47.978,19932.147,1598140799999,875397.52433,8238,9077.201,399321.416085,0
2020-08-24 02:59:59.999,1598140800000,48.048,48.057,42.828,43.591,17284.855,1598227199999,772475.352702,6039,7490.484,334547.197708,0


In [7]:
# Fix Column types
df[['open', 'high', 'low', 'close', 'volume']] = df[['open', 'high', 'low', 'close', 'volume']].apply(pd.to_numeric)

df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')

In [8]:
df.dtypes

open_time          datetime64[ns]
open                      float64
high                      float64
low                       float64
close                     float64
volume                    float64
close_time         datetime64[ns]
qav                        object
num_trades                  int64
taker_base_vol             object
taker_quote_vol            object
ignore                     object
dtype: object

In [9]:
print("First date:", df["open_time"].min())
print("Last date:", df["open_time"].max())

First date: 2020-08-19 00:00:00
Last date: 2021-09-24 00:00:00


## Create Features

In [10]:
features = ['open_time', 'close_time',
            'open','high','low','close','volume','num_trades']

df = df[features].copy(deep=True)

### Rolling Mean-Std 

In [11]:
# Calculate rolling mean/std
window = 10

df[f'MA_{window}'] = df['close'].rolling(window).mean()
df[f'STD_{window}'] = df['close'].rolling(window).std()

In [12]:
# Check dataset
df.head()

Unnamed: 0,open_time,close_time,open,high,low,close,volume,num_trades,MA_10,STD_10
2020-08-20 02:59:59.999,2020-08-19,2020-08-19 23:59:59.999,50.42,50.42,37.77,42.079,11838.172,4880,,
2020-08-21 02:59:59.999,2020-08-20,2020-08-20 23:59:59.999,42.281,53.88,39.542,46.93,62972.514,18560,,
2020-08-22 02:59:59.999,2020-08-21,2020-08-21 23:59:59.999,46.952,52.1,41.352,41.896,47528.204,19241,,
2020-08-23 02:59:59.999,2020-08-22,2020-08-22 23:59:59.999,42.181,51.0,41.072,47.978,19932.147,8238,,
2020-08-24 02:59:59.999,2020-08-23,2020-08-23 23:59:59.999,48.048,48.057,42.828,43.591,17284.855,6039,,


### Price changes

In [13]:
df['Price_Change'] = np.round(df['close'] - df['close'].shift(1),2)

In [14]:
# Seeing how much price changed in consecutive days (in percentage)
df['Price_Change%'] = np.round( ((df['close'] - df['close'].shift(1)) / df['close'].shift(1) ) * 100, 2)

In [15]:
# Sanity check
# (752.17-732.00)/732.00

In [16]:
df['Change'] = pd.cut(df["Price_Change%"], bins = [-np.inf, -5, 5, np.inf], labels=["Decrease", "Neutral", "Increase"])

In [17]:
# Check dataset
df.head()

Unnamed: 0,open_time,close_time,open,high,low,close,volume,num_trades,MA_10,STD_10,Price_Change,Price_Change%,Change
2020-08-20 02:59:59.999,2020-08-19,2020-08-19 23:59:59.999,50.42,50.42,37.77,42.079,11838.172,4880,,,,,
2020-08-21 02:59:59.999,2020-08-20,2020-08-20 23:59:59.999,42.281,53.88,39.542,46.93,62972.514,18560,,,4.85,11.53,Increase
2020-08-22 02:59:59.999,2020-08-21,2020-08-21 23:59:59.999,46.952,52.1,41.352,41.896,47528.204,19241,,,-5.03,-10.73,Decrease
2020-08-23 02:59:59.999,2020-08-22,2020-08-22 23:59:59.999,42.181,51.0,41.072,47.978,19932.147,8238,,,6.08,14.52,Increase
2020-08-24 02:59:59.999,2020-08-23,2020-08-23 23:59:59.999,48.048,48.057,42.828,43.591,17284.855,6039,,,-4.39,-9.14,Decrease


## Identify Important Dates: Dates where price exceeded 2 stds 

In [32]:
# Create upper and lower bounds
std_times = 3

df['upper_bound'] = df[f"MA_{window}"] + std_times * df[f"STD_{window}"] 
df['lower_bound'] = df[f"MA_{window}"] - std_times * df[f"STD_{window}"] 

In [33]:
# Find cross-points
df['above_bound'] = (df['upper_bound'] < df['high'])
df['below_bound'] = (df['lower_bound'] > df['low'])

In [34]:
## Conclusions

# - There aren't any day crossing both upper and low bound!
# - There are 38 days crossing above bound
# - There are 31 days crossing below bound

print("Total days:", len(df))
df[["above_bound", "below_bound"]].value_counts()

Total days: 402


above_bound  below_bound
False        False          349
True         False           43
False        True             9
True         True             1
dtype: int64

In [35]:
# Create cross_bounds columns [above, below, neutral]

def create_cross_bounds(row):
    if row['above_bound'] :
        return 'above'
    elif row['below_bound'] :
        return 'below'
    else:
        return 'neutral'
    
df["cross_bounds"] = df.apply(lambda row: create_cross_bounds(row), axis=1)

In [36]:
df.head()

Unnamed: 0,open_time,close_time,open,high,low,close,volume,num_trades,MA_10,STD_10,Price_Change,Price_Change%,Change,upper_bound,lower_bound,above_bound,below_bound,cross_bounds
2020-08-20 02:59:59.999,2020-08-19,2020-08-19 23:59:59.999,50.42,50.42,37.77,42.079,11838.172,4880,,,,,,,,False,False,neutral
2020-08-21 02:59:59.999,2020-08-20,2020-08-20 23:59:59.999,42.281,53.88,39.542,46.93,62972.514,18560,,,4.85,11.53,Increase,,,False,False,neutral
2020-08-22 02:59:59.999,2020-08-21,2020-08-21 23:59:59.999,46.952,52.1,41.352,41.896,47528.204,19241,,,-5.03,-10.73,Decrease,,,False,False,neutral
2020-08-23 02:59:59.999,2020-08-22,2020-08-22 23:59:59.999,42.181,51.0,41.072,47.978,19932.147,8238,,,6.08,14.52,Increase,,,False,False,neutral
2020-08-24 02:59:59.999,2020-08-23,2020-08-23 23:59:59.999,48.048,48.057,42.828,43.591,17284.855,6039,,,-4.39,-9.14,Decrease,,,False,False,neutral


## Plot candlesticks

In [37]:
import plotly.graph_objects as go
from datetime import datetime

In [38]:
## Plotly 

# candlestick plot
fig = go.Figure(data=[go.Candlestick(x=df["open_time"],
                open=df['open'],
                high=df['high'],
                low=df['low'],
                close=df['close'])])

fig.update_layout(
    yaxis_title="USDT",
    title={
        'text': symbol + " Price and cross-points",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

# MA plot
fig.add_trace(
    go.Scatter(
        x = df["open_time"],
        y = df[f"MA_{window}"],
        name = "Moving average",
        mode="lines",
        line=go.scatter.Line(color="black", dash="dash"),
        showlegend=True)
)


# Upper bound
fig.add_trace(
    go.Scatter(
        x= df["open_time"],
        y= df[f"MA_{window}"] + std_times * df[f"STD_{window}"],
        name = "Upper bound",
        mode="lines",
        line=go.scatter.Line(color="lightgreen"),
        showlegend=True)
)

# Lower bound
fig.add_trace(
    go.Scatter(
        x = df["open_time"],
        y = df[f"MA_{window}"] - std_times * df[f"STD_{window}"],
        name = "Lower bound",
        mode="lines",
        line=go.scatter.Line(color="firebrick"),
        showlegend=True)
)


## Plot crossing points

low_value = int(df[['open','high','low','close']].min().min() - 10)

fig.add_trace(go.Scatter(
    x = df.loc[df['above_bound'], "open_time"],
    y = np.repeat(low_value, len(df.loc[df['above_bound'], "open_time"])),
    marker=dict(color="green", size=4),
    mode="markers",
    name = 'above_boundary'
))


fig.add_trace(go.Scatter(
    x = df.loc[df['below_bound'], "open_time"],
    y = np.repeat(low_value, len(df.loc[df['below_bound'], "open_time"])),
    marker=dict(color="crimson", size=4),
    mode="markers",
    name = 'below_boundary'
))


fig.show()

In [39]:
# Save the plot in the path you want! 
fig.write_html(f"../outcome/{symbol}.html")

## Save final dataset

In [40]:
df.head()

Unnamed: 0,open_time,close_time,open,high,low,close,volume,num_trades,MA_10,STD_10,Price_Change,Price_Change%,Change,upper_bound,lower_bound,above_bound,below_bound,cross_bounds
2020-08-20 02:59:59.999,2020-08-19,2020-08-19 23:59:59.999,50.42,50.42,37.77,42.079,11838.172,4880,,,,,,,,False,False,neutral
2020-08-21 02:59:59.999,2020-08-20,2020-08-20 23:59:59.999,42.281,53.88,39.542,46.93,62972.514,18560,,,4.85,11.53,Increase,,,False,False,neutral
2020-08-22 02:59:59.999,2020-08-21,2020-08-21 23:59:59.999,46.952,52.1,41.352,41.896,47528.204,19241,,,-5.03,-10.73,Decrease,,,False,False,neutral
2020-08-23 02:59:59.999,2020-08-22,2020-08-22 23:59:59.999,42.181,51.0,41.072,47.978,19932.147,8238,,,6.08,14.52,Increase,,,False,False,neutral
2020-08-24 02:59:59.999,2020-08-23,2020-08-23 23:59:59.999,48.048,48.057,42.828,43.591,17284.855,6039,,,-4.39,-9.14,Decrease,,,False,False,neutral


In [41]:
df.tail()

Unnamed: 0,open_time,close_time,open,high,low,close,volume,num_trades,MA_10,STD_10,Price_Change,Price_Change%,Change,upper_bound,lower_bound,above_bound,below_bound,cross_bounds
2021-09-21 02:59:59.999,2021-09-20,2021-09-20 23:59:59.999,44.79,45.58,38.54,38.97,85004.68,12936,44.413,2.26365,-5.81,-12.97,Decrease,51.203951,37.622049,False,False,neutral
2021-09-22 02:59:59.999,2021-09-21,2021-09-21 23:59:59.999,38.98,42.46,36.02,37.41,96945.014,10833,43.411,2.906252,-1.56,-4.0,Neutral,52.129755,34.692245,False,False,neutral
2021-09-23 02:59:59.999,2021-09-22,2021-09-22 23:59:59.999,37.57,58.0,36.49,41.58,619411.78,104706,43.007,2.845218,4.17,11.15,Increase,51.542655,34.471345,True,False,above
2021-09-24 02:59:59.999,2021-09-23,2021-09-23 23:59:59.999,41.44,45.25,40.96,42.75,96899.76,17908,42.953,2.844375,1.17,2.81,Neutral,51.486124,34.419876,False,False,neutral
2021-09-25 02:59:59.999,2021-09-24,2021-09-24 23:59:59.999,42.7,43.3,38.55,41.36,56563.56,6753,42.719,2.87221,-1.39,-3.25,Neutral,51.335629,34.102371,False,False,neutral


In [43]:
# Store dataset
path = f"../data/{symbol}_{interval}_price_data.csv"
print("Path to save:", path)
df.to_csv(path, index=True)

Path to save: ../data/NMRUSDT-1d-data.csv
