# Get Data using python-binance

## Import relevant libraries

In [1]:
from binance.client import Client
from dotenv import load_dotenv
import os
import pandas as pd
import datetime

## Set some 'global' parameters

In [2]:
load_dotenv('dotenv_path=config_files/.env')

True

## Get the data

### Create the client

In [3]:
client = Client(os.environ["BINANCE_API_KEY"], os.environ["BINANCE_SECRET_KEY"])

### Parameters

In [4]:
asset_ticket = "BTCUSDT"
#asset_ticket = "ETHUSDT"
#asset_ticket = "BNBUSDT"

timestamp = Client.KLINE_INTERVAL_1MINUTE
#timestamp = Client.KLINE_INTERVAL_15MINUTE
#timestamp = Client.KLINE_INTERVAL_1HOUR
#timestamp = Client.KLINE_INTERVAL_1DAY

start_date = "1 Jan, 2017"
end_date = "1 Jan, 2018"

### Get the data

In [5]:
klines = client.get_historical_klines(asset_ticket, timestamp, start_date, end_date)

### Convert list of lists to Pandas Datafrae

In [26]:
df_klines = pd.DataFrame(klines, columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volumne', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])

### Check the data

In [27]:
df_klines.sample(5)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
1176,1605830400000,17802.81,18815.22,17740.04,18655.67,88423.018489,1605916799999,1623135138.5910714,1444692,43549.51207,799574963.8058717,0
1145,1603152000000,11751.46,12038.38,11677.59,11909.99,62134.750663,1603238399999,737425592.0819716,906111,31911.708832,378905888.9600628,0
916,1583366400000,8750.99,9159.42,8746.54,9054.68,58201.866355,1583452799999,524102029.20478296,619552,30482.082171,274387102.9108894,0
297,1529884800000,6137.95,6350.0,6061.97,6252.0,39164.515306,1529971199999,242859915.8036461,218384,21986.057384,136305269.99012783,0
41,1507766400000,4821.43,5439.99,4810.16,5430.0,1276.701482,1507852799999,6659759.29119366,8030,796.594719,4149667.59915114,0


In [28]:
df_klines.describe()

Unnamed: 0,open_time,close_time,number_of_trades
count,1948.0,1948.0,1948.0
mean,1588334000000.0,1588421000000.0,1236200.0
std,48598570000.0,48598640000.0,1609978.0
min,1504224000000.0,1504310000000.0,3070.0
25%,1546279000000.0,1546366000000.0,260497.8
50%,1588334000000.0,1588421000000.0,656155.5
75%,1630390000000.0,1630476000000.0,1528346.0
max,1672445000000.0,1672531000000.0,14530600.0


In [29]:
df_klines.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1948 entries, 0 to 1947
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   open_time                     1948 non-null   int64 
 1   open                          1948 non-null   object
 2   high                          1948 non-null   object
 3   low                           1948 non-null   object
 4   close                         1948 non-null   object
 5   volume                        1948 non-null   object
 6   close_time                    1948 non-null   int64 
 7   quote_asset_volumne           1948 non-null   object
 8   number_of_trades              1948 non-null   int64 
 9   taker_buy_base_asset_volume   1948 non-null   object
 10  taker_buy_quote_asset_volume  1948 non-null   object
 11  ignore                        1948 non-null   object
dtypes: int64(3), object(9)
memory usage: 182.8+ KB


In [30]:
print(df_klines.isnull().sum())

open_time                       0
open                            0
high                            0
low                             0
close                           0
volume                          0
close_time                      0
quote_asset_volumne             0
number_of_trades                0
taker_buy_base_asset_volume     0
taker_buy_quote_asset_volume    0
ignore                          0
dtype: int64


## Feature Engineering

### Copy original data

In [31]:
df_klines_copy = df_klines.copy()

### Convert 'object' to float pandas

In [32]:
df_klines_copy['open'] = pd.to_numeric(df_klines_copy['open'])
df_klines_copy['high'] = pd.to_numeric(df_klines_copy['high'])
df_klines_copy['low'] = pd.to_numeric(df_klines_copy['low'])
df_klines_copy['close'] = pd.to_numeric(df_klines_copy['close'])
df_klines_copy['volume'] = pd.to_numeric(df_klines_copy['volume'])

### Convert the 'open_time' and 'close_time' to a Pandas DataTime format

In [33]:
df_klines_copy['formatted_open_time'] = pd.to_datetime(df_klines_copy['open_time'], infer_datetime_format=True, unit="ms")
df_klines_copy['formatted_close_time'] = pd.to_datetime(df_klines_copy['close_time'], infer_datetime_format=True, unit="ms")

### Check the data - head and tail

In [34]:
df_klines_copy.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
0,1504224000000,4689.89,4885.55,4654.88,4834.91,560.666366,1504310399999,2665165.55924616,4000,118.660103,566254.43089734,0,2017-09-01,2017-09-01 23:59:59.999
1,1504310400000,4796.16,4939.19,4286.87,4472.14,929.148595,1504396799999,4257723.944817,7475,379.199863,1738334.08541735,0,2017-09-02,2017-09-02 23:59:59.999
2,1504396800000,4508.5,4714.76,4298.33,4509.08,691.216198,1504483199999,3121052.75230181,6022,258.934536,1169644.69159694,0,2017-09-03,2017-09-03 23:59:59.999
3,1504483200000,4505.0,4527.49,3972.51,4100.11,1394.644614,1504569599999,5909947.49008974,9317,578.600851,2460362.61291664,0,2017-09-04,2017-09-04 23:59:59.999
4,1504569600000,4106.97,4484.99,3603.0,4366.47,1228.938157,1504655999999,5083453.18675071,10084,533.476065,2214297.83961246,0,2017-09-05,2017-09-05 23:59:59.999


In [35]:
df_klines_copy.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
1943,1672099200000,16919.39,16972.83,16592.37,16706.36,173749.58616,1672185599999,2918379509.5314097,5047628,86096.58586,1446098274.5653381,0,2022-12-27,2022-12-27 23:59:59.999
1944,1672185600000,16706.06,16785.19,16465.33,16547.31,193037.56577,1672271999999,3210894236.8123903,5465468,96118.18717,1598767893.1727967,0,2022-12-28,2022-12-28 23:59:59.999
1945,1672272000000,16547.32,16664.41,16488.91,16633.47,160998.47158,1672358399999,2671665748.0816226,4553877,80977.62443,1343799862.9950268,0,2022-12-29,2022-12-29 23:59:59.999
1946,1672358400000,16633.47,16677.35,16333.0,16607.48,164916.31174,1672444799999,2726831056.725026,4310415,82170.4749,1358631100.7592444,0,2022-12-30,2022-12-30 23:59:59.999
1947,1672444800000,16607.48,16644.09,16470.0,16542.4,114490.42864,1672531199999,1897047504.9188972,3344374,57021.06919,944834393.3413354,0,2022-12-31,2022-12-31 23:59:59.999


### Create the 'upper_shadow', 'lower_shadow' and 'real_body' values (to compose the CURL)

In [36]:
def create_curl_values(df):
    try:
        results_upper_shadow = []
        results_lower_shadow = []
        results_real_body = []
        for index, row in df.iterrows():
            if row.open > row.close:
                results_upper_shadow.append(row.high - row.open)
                results_lower_shadow.append(row.close - row.low)
                results_real_body.append(row.open - row.close)
            else:
                results_upper_shadow.append(row.high - row.close)
                results_lower_shadow.append(row.open - row.low)
                results_real_body.append(row.close - row.open)

        df['upper_shadow'] = results_upper_shadow
        df['lower_shadow'] = results_lower_shadow
        df['real_body'] = results_real_body

        return df

    except Exception as e:
        print(row.formatted_open_time)
        print(e)

In [37]:
df_klines_copy = create_curl_values(df_klines_copy)

### Check the data - head and tail

In [38]:
df_klines_copy.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time,upper_shadow,lower_shadow,real_body
0,1504224000000,4689.89,4885.55,4654.88,4834.91,560.666366,1504310399999,2665165.55924616,4000,118.660103,566254.43089734,0,2017-09-01,2017-09-01 23:59:59.999,50.64,35.01,145.02
1,1504310400000,4796.16,4939.19,4286.87,4472.14,929.148595,1504396799999,4257723.944817,7475,379.199863,1738334.08541735,0,2017-09-02,2017-09-02 23:59:59.999,143.03,185.27,324.02
2,1504396800000,4508.5,4714.76,4298.33,4509.08,691.216198,1504483199999,3121052.75230181,6022,258.934536,1169644.69159694,0,2017-09-03,2017-09-03 23:59:59.999,205.68,210.17,0.58
3,1504483200000,4505.0,4527.49,3972.51,4100.11,1394.644614,1504569599999,5909947.49008974,9317,578.600851,2460362.61291664,0,2017-09-04,2017-09-04 23:59:59.999,22.49,127.6,404.89
4,1504569600000,4106.97,4484.99,3603.0,4366.47,1228.938157,1504655999999,5083453.18675071,10084,533.476065,2214297.83961246,0,2017-09-05,2017-09-05 23:59:59.999,118.52,503.97,259.5


In [39]:
df_klines_copy.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time,upper_shadow,lower_shadow,real_body
1943,1672099200000,16919.39,16972.83,16592.37,16706.36,173749.58616,1672185599999,2918379509.5314097,5047628,86096.58586,1446098274.5653381,0,2022-12-27,2022-12-27 23:59:59.999,53.44,113.99,213.03
1944,1672185600000,16706.06,16785.19,16465.33,16547.31,193037.56577,1672271999999,3210894236.8123903,5465468,96118.18717,1598767893.1727967,0,2022-12-28,2022-12-28 23:59:59.999,79.13,81.98,158.75
1945,1672272000000,16547.32,16664.41,16488.91,16633.47,160998.47158,1672358399999,2671665748.0816226,4553877,80977.62443,1343799862.9950268,0,2022-12-29,2022-12-29 23:59:59.999,30.94,58.41,86.15
1946,1672358400000,16633.47,16677.35,16333.0,16607.48,164916.31174,1672444799999,2726831056.725026,4310415,82170.4749,1358631100.7592444,0,2022-12-30,2022-12-30 23:59:59.999,43.88,274.48,25.99
1947,1672444800000,16607.48,16644.09,16470.0,16542.4,114490.42864,1672531199999,1897047504.9188972,3344374,57021.06919,944834393.3413354,0,2022-12-31,2022-12-31 23:59:59.999,36.61,72.4,65.08


## Export the Data

### Export parameters

In [40]:
export_path = "../data"
export_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d')
export_extension = ".csv"
full_export_path = os.path.join(export_path, export_filename + export_extension)

### Export

In [41]:
df_klines_copy.to_csv(full_export_path, index=False)