# Get Data using python-binance

## Import relevant libraries

In [22]:
from binance.client import Client
from dotenv import load_dotenv
import os
import pandas as pd
import datetime

## Set some 'global' parameters

In [23]:
load_dotenv()

True

## Get the data

### Create the client

In [24]:
client = Client(os.environ["BINANCE_API_KEY"], os.environ["BINANCE_SECRET_KEY"])

### Parameters

In [43]:
asset_ticket = "BTCUSDT"
# timestamp = Client.KLINE_INTERVAL_1MINUTE
# timestamp = Client.KLINE_INTERVAL_5MINUTE
timestamp = Client.KLINE_INTERVAL_15MINUTE
start_date = "1 Jan, 2020"
end_date = "31 Dec, 2021"

### Get the data

In [44]:
klines = client.get_historical_klines(asset_ticket, timestamp, start_date, end_date)

### Convert list of lists to Pandas Datafrae

In [45]:
df_klines = pd.DataFrame(klines, columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volumne', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])

### Check the data

In [46]:
df_klines.sample(5)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
15055,1591426800000,9610.0,9640.0,9610.0,9627.56,285.152013,1591427699999,2745040.18070314,4488,121.520742,1169600.01132368,0
11205,1587961800000,7706.0,7734.97,7705.0,7734.19,374.848973,1587962699999,2894882.25116354,5243,210.764428,1627836.82764014,0
67494,1638714600000,48959.48,49136.14,48869.13,49043.09,377.54722,1638715499999,18500735.9594624,14442,203.28506,9961108.4274389,0
16404,1592640900000,9299.31,9314.32,9293.46,9304.59,388.414024,1592641799999,3613479.24828892,4110,187.281495,1742066.51353627,0
36287,1610569800000,35937.17,36062.26,35704.21,35964.32,1412.410407,1610570699999,50661991.60717947,31324,721.855898,25896865.8550331,0


In [47]:
df_klines.describe()

Unnamed: 0,open_time,close_time,number_of_trades
count,69933.0,69933.0,69933.0
mean,1609381000000.0,1609382000000.0,14073.070854
std,18208050000.0,18208050000.0,12105.703394
min,1577837000000.0,1577838000000.0,0.0
25%,1593625000000.0,1593625000000.0,6511.0
50%,1609381000000.0,1609382000000.0,10878.0
75%,1625151000000.0,1625152000000.0,17840.0
max,1640909000000.0,1640910000000.0,340874.0


In [48]:
df_klines.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69933 entries, 0 to 69932
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   open_time                     69933 non-null  int64 
 1   open                          69933 non-null  object
 2   high                          69933 non-null  object
 3   low                           69933 non-null  object
 4   close                         69933 non-null  object
 5   volume                        69933 non-null  object
 6   close_time                    69933 non-null  int64 
 7   quote_asset_volumne           69933 non-null  object
 8   number_of_trades              69933 non-null  int64 
 9   taker_buy_base_asset_volume   69933 non-null  object
 10  taker_buy_quote_asset_volume  69933 non-null  object
 11  ignore                        69933 non-null  object
dtypes: int64(3), object(9)
memory usage: 6.4+ MB


In [49]:
print(df_klines.isnull().sum())

open_time                       0
open                            0
high                            0
low                             0
close                           0
volume                          0
close_time                      0
quote_asset_volumne             0
number_of_trades                0
taker_buy_base_asset_volume     0
taker_buy_quote_asset_volume    0
ignore                          0
dtype: int64


## Feature Engineering

### Copy original data

In [50]:
df_klines_copy = df_klines.copy()

### Convert 'object' to float pandas

In [51]:
df_klines_copy['open'] = pd.to_numeric(df_klines_copy['open'])
df_klines_copy['high'] = pd.to_numeric(df_klines_copy['high'])
df_klines_copy['low'] = pd.to_numeric(df_klines_copy['low'])
df_klines_copy['close'] = pd.to_numeric(df_klines_copy['close'])
df_klines_copy['volume'] = pd.to_numeric(df_klines_copy['volume'])

### Convert the 'open_time' and 'close_time' to a Pandas DataTime format

In [52]:
df_klines_copy['formatted_open_time'] = pd.to_datetime(df_klines_copy['open_time'], infer_datetime_format=True, unit="ms")
df_klines_copy['formatted_close_time'] = pd.to_datetime(df_klines_copy['close_time'], infer_datetime_format=True, unit="ms")

### Check the data - head and tail

In [53]:
df_klines_copy.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
0,1577836800000,7195.24,7196.25,7178.2,7180.97,202.942868,1577837699999,1458244.89963462,2452,76.962041,553069.53492894,0,2020-01-01 00:00:00,2020-01-01 00:14:59.999
1,1577837700000,7180.97,7186.4,7175.47,7178.45,128.242654,1577838599999,920702.74707886,1948,58.38911,419226.47144695,0,2020-01-01 00:15:00,2020-01-01 00:29:59.999
2,1577838600000,7178.19,7185.44,7176.23,7179.56,83.487458,1577839499999,599479.15280528,1580,43.822374,314667.3210715,0,2020-01-01 00:30:00,2020-01-01 00:44:59.999
3,1577839500000,7179.35,7183.98,7175.46,7177.02,97.141921,1577840399999,697429.77996667,1660,46.979601,337325.86220298,0,2020-01-01 00:45:00,2020-01-01 00:59:59.999
4,1577840400000,7176.47,7194.04,7175.71,7190.86,103.520522,1577841299999,744089.10404112,1588,56.251378,404330.49838606,0,2020-01-01 01:00:00,2020-01-01 01:14:59.999


In [54]:
df_klines_copy.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
69928,1640905200000,47084.88,47240.0,46987.99,47110.36,235.14202,1640906099999,11074079.3199467,9099,128.88618,6070838.5666656,0,2021-12-30 23:00:00,2021-12-30 23:14:59.999
69929,1640906100000,47110.36,47242.6,47040.81,47122.27,179.83497,1640906999999,8478394.9244503,7332,78.48148,3700971.9890607,0,2021-12-30 23:15:00,2021-12-30 23:29:59.999
69930,1640907000000,47122.28,47160.0,46940.96,47092.12,198.46904,1640907899999,9337706.9981783,7905,86.94927,4089632.5629924,0,2021-12-30 23:30:00,2021-12-30 23:44:59.999
69931,1640907900000,47092.12,47177.3,47044.16,47120.87,96.74091,1640908799999,4557887.2447577,5540,50.11043,2360849.7628395,0,2021-12-30 23:45:00,2021-12-30 23:59:59.999
69932,1640908800000,47120.88,47500.0,47080.0,47296.76,379.28228,1640909699999,17939187.9837933,12653,196.76069,9309185.9422689,0,2021-12-31 00:00:00,2021-12-31 00:14:59.999


### Create the 'upper_shadow', 'lower_shadow' and 'real_body' values (to compose the CURL)

In [55]:
def create_curl_values(df):
    try:
        results_upper_shadow = []
        results_lower_shadow = []
        results_real_body = []
        for index, row in df.iterrows():
            if row.open > row.close:
                results_upper_shadow.append(row.high - row.open)
                results_lower_shadow.append(row.close - row.low)
                results_real_body.append(row.open - row.close)
            else:
                results_upper_shadow.append(row.high - row.close)
                results_lower_shadow.append(row.open - row.low)
                results_real_body.append(row.close - row.open)

        df['upper_shadow'] = results_upper_shadow
        df['lower_shadow'] = results_lower_shadow
        df['real_body'] = results_real_body

        return df

    except Exception as e:
        print(row.formatted_open_time)
        print(e)

In [56]:
df_klines_copy = create_curl_values(df_klines_copy)

### Check the data - head and tail

In [57]:
df_klines_copy.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time,upper_shadow,lower_shadow,real_body
0,1577836800000,7195.24,7196.25,7178.2,7180.97,202.942868,1577837699999,1458244.89963462,2452,76.962041,553069.53492894,0,2020-01-01 00:00:00,2020-01-01 00:14:59.999,1.01,2.77,14.27
1,1577837700000,7180.97,7186.4,7175.47,7178.45,128.242654,1577838599999,920702.74707886,1948,58.38911,419226.47144695,0,2020-01-01 00:15:00,2020-01-01 00:29:59.999,5.43,2.98,2.52
2,1577838600000,7178.19,7185.44,7176.23,7179.56,83.487458,1577839499999,599479.15280528,1580,43.822374,314667.3210715,0,2020-01-01 00:30:00,2020-01-01 00:44:59.999,5.88,1.96,1.37
3,1577839500000,7179.35,7183.98,7175.46,7177.02,97.141921,1577840399999,697429.77996667,1660,46.979601,337325.86220298,0,2020-01-01 00:45:00,2020-01-01 00:59:59.999,4.63,1.56,2.33
4,1577840400000,7176.47,7194.04,7175.71,7190.86,103.520522,1577841299999,744089.10404112,1588,56.251378,404330.49838606,0,2020-01-01 01:00:00,2020-01-01 01:14:59.999,3.18,0.76,14.39


In [58]:
df_klines_copy.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time,upper_shadow,lower_shadow,real_body
69928,1640905200000,47084.88,47240.0,46987.99,47110.36,235.14202,1640906099999,11074079.3199467,9099,128.88618,6070838.5666656,0,2021-12-30 23:00:00,2021-12-30 23:14:59.999,129.64,96.89,25.48
69929,1640906100000,47110.36,47242.6,47040.81,47122.27,179.83497,1640906999999,8478394.9244503,7332,78.48148,3700971.9890607,0,2021-12-30 23:15:00,2021-12-30 23:29:59.999,120.33,69.55,11.91
69930,1640907000000,47122.28,47160.0,46940.96,47092.12,198.46904,1640907899999,9337706.9981783,7905,86.94927,4089632.5629924,0,2021-12-30 23:30:00,2021-12-30 23:44:59.999,37.72,151.16,30.16
69931,1640907900000,47092.12,47177.3,47044.16,47120.87,96.74091,1640908799999,4557887.2447577,5540,50.11043,2360849.7628395,0,2021-12-30 23:45:00,2021-12-30 23:59:59.999,56.43,47.96,28.75
69932,1640908800000,47120.88,47500.0,47080.0,47296.76,379.28228,1640909699999,17939187.9837933,12653,196.76069,9309185.9422689,0,2021-12-31 00:00:00,2021-12-31 00:14:59.999,203.24,40.88,175.88


## Export the Data

### Export parameters

In [59]:
export_path = "../data"
export_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d')
export_extension = ".csv"
full_export_path = os.path.join(export_path, export_filename + export_extension)

### Export

In [60]:
df_klines_copy.to_csv(full_export_path, index=False)