# Get Data using python-binance

## Import relevant libraries

In [1]:
from binance.client import Client
from dotenv import load_dotenv
import os
import pandas as pd
import datetime

## Set some 'global' parameters

In [2]:
load_dotenv()

True

## Get the data

### Create the client

In [3]:
client = Client(os.environ["BINANCE_API_KEY"], os.environ["BINANCE_SECRET_KEY"])

### Parameters

In [4]:
asset_ticket = "BTCUSDT"
timestamp = Client.KLINE_INTERVAL_1MINUTE
start_date = "1 Jan, 2020"
end_date = "31 Dec, 2021"

### Get the data

In [5]:
klines = client.get_historical_klines(asset_ticket, timestamp, start_date, end_date)

### Convert list of lists to Pandas Datafrae

In [6]:
df_klines = pd.DataFrame(klines, columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volumne', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])

### Check the data

In [7]:
df_klines.sample(5)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
636260,1616097660000,57084.88,57280.71,57023.0,57248.72,104.469517,1616097719999,5967781.98704099,2865,50.319025,2874304.69805179,0
143210,1586461920000,7275.32,7275.81,7268.93,7268.93,17.161893,1586461979999,124786.4093826,427,5.548326,40340.65759386,0
599303,1613874840000,56502.14,56502.14,56463.16,56465.28,20.895391,1613874899999,1180293.47817414,638,10.8653,613724.14144305,0
130689,1585710660000,6342.75,6351.6,6342.45,6348.39,88.078858,1585710719999,559070.77634939,704,49.963666,317159.14975683,0
887062,1631188020000,46387.79,46408.05,46350.31,46367.73,63.63752,1631188079999,2952091.5085184,969,33.87232,1571525.9968336,0


In [8]:
df_klines.describe()

Unnamed: 0,open_time,close_time,number_of_trades
count,1048956.0,1048956.0,1048956.0
mean,1609381000000.0,1609381000000.0,938.0391
std,18207550000.0,18207550000.0,1016.275
min,1577837000000.0,1577837000000.0,0.0
25%,1593625000000.0,1593625000000.0,398.0
50%,1609381000000.0,1609381000000.0,682.0
75%,1625151000000.0,1625151000000.0,1144.0
max,1640909000000.0,1640909000000.0,55181.0


In [9]:
df_klines.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048956 entries, 0 to 1048955
Data columns (total 12 columns):
 #   Column                        Non-Null Count    Dtype 
---  ------                        --------------    ----- 
 0   open_time                     1048956 non-null  int64 
 1   open                          1048956 non-null  object
 2   high                          1048956 non-null  object
 3   low                           1048956 non-null  object
 4   close                         1048956 non-null  object
 5   volume                        1048956 non-null  object
 6   close_time                    1048956 non-null  int64 
 7   quote_asset_volumne           1048956 non-null  object
 8   number_of_trades              1048956 non-null  int64 
 9   taker_buy_base_asset_volume   1048956 non-null  object
 10  taker_buy_quote_asset_volume  1048956 non-null  object
 11  ignore                        1048956 non-null  object
dtypes: int64(3), object(9)
memory usage: 96.0+

In [10]:
print(df_klines.isnull().sum())

open_time                       0
open                            0
high                            0
low                             0
close                           0
volume                          0
close_time                      0
quote_asset_volumne             0
number_of_trades                0
taker_buy_base_asset_volume     0
taker_buy_quote_asset_volume    0
ignore                          0
dtype: int64


## Feature Engineering

### Copy original data

In [11]:
df_klines_copy = df_klines.copy()

### Convert 'object' to float pandas

In [12]:
df_klines_copy['open'] = pd.to_numeric(df_klines_copy['open'])
df_klines_copy['high'] = pd.to_numeric(df_klines_copy['high'])
df_klines_copy['low'] = pd.to_numeric(df_klines_copy['low'])
df_klines_copy['close'] = pd.to_numeric(df_klines_copy['close'])
df_klines_copy['volume'] = pd.to_numeric(df_klines_copy['volume'])

### Convert the 'open_time' and 'close_time' to a Pandas DataTime format

In [13]:
df_klines_copy['formatted_open_time'] = pd.to_datetime(df_klines_copy['open_time'], infer_datetime_format=True, unit="ms")
df_klines_copy['formatted_close_time'] = pd.to_datetime(df_klines_copy['close_time'], infer_datetime_format=True, unit="ms")

### Check the data - head and tail

In [14]:
df_klines_copy.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
0,1577836800000,7195.24,7196.25,7183.14,7186.68,51.642812,1577836859999,371233.51835535,493,19.59823,140888.41428273,0,2020-01-01 00:00:00,2020-01-01 00:00:59.999
1,1577836860000,7187.67,7188.06,7182.2,7184.03,7.248148,1577836919999,52080.1277878,135,2.031772,14599.21192429,0,2020-01-01 00:01:00,2020-01-01 00:01:59.999
2,1577836920000,7184.41,7184.71,7180.26,7182.43,11.681677,1577836979999,83903.74163545,202,5.479244,39357.08177646,0,2020-01-01 00:02:00,2020-01-01 00:02:59.999
3,1577836980000,7183.83,7188.94,7182.49,7185.94,10.025391,1577837039999,72033.22664851,136,3.294966,23680.57192367,0,2020-01-01 00:03:00,2020-01-01 00:03:59.999
4,1577837040000,7185.54,7185.54,7178.64,7179.78,14.911105,1577837099999,107066.52182466,161,2.369033,17012.01513816,0,2020-01-01 00:04:00,2020-01-01 00:04:59.999


In [15]:
df_klines_copy.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
1048951,1640908560000,47098.19,47147.14,47098.18,47134.56,6.49057,1640908619999,305861.6268939,387,4.02327,189596.506989,0,2021-12-30 23:56:00,2021-12-30 23:56:59.999
1048952,1640908620000,47134.56,47161.69,47129.96,47144.56,4.78281,1640908679999,225491.3727645,324,1.69518,79919.6452588,0,2021-12-30 23:57:00,2021-12-30 23:57:59.999
1048953,1640908680000,47144.56,47147.89,47122.96,47139.98,4.15772,1640908739999,195959.9917835,249,1.43431,67601.8230559,0,2021-12-30 23:58:00,2021-12-30 23:58:59.999
1048954,1640908740000,47139.98,47141.73,47116.71,47120.87,3.97618,1640908799999,187407.572887,336,0.88827,41866.9158904,0,2021-12-30 23:59:00,2021-12-30 23:59:59.999
1048955,1640908800000,47120.88,47121.3,47080.0,47091.29,14.37694,1640908859999,677226.1825368,514,3.85955,181772.3706326,0,2021-12-31 00:00:00,2021-12-31 00:00:59.999


### Create the 'upper_shadow', 'lower_shadow' and 'real_body' values (to compose the CURL)

In [16]:
def create_curl_values(df):
    try:
        results_upper_shadow = []
        results_lower_shadow = []
        results_real_body = []
        for index, row in df.iterrows():
            if row.open > row.close:
                results_upper_shadow.append(row.high - row.open)
                results_lower_shadow.append(row.close - row.low)
                results_real_body.append(row.open - row.close)
            else:
                results_upper_shadow.append(row.high - row.close)
                results_lower_shadow.append(row.open - row.low)
                results_real_body.append(row.close - row.open)

        df['upper_shadow'] = results_upper_shadow
        df['lower_shadow'] = results_lower_shadow
        df['real_body'] = results_real_body

        return df

    except Exception as e:
        print(row.formatted_open_time)
        print(e)

In [17]:
df_klines_copy = create_curl_values(df_klines_copy)

### Check the data - head and tail

In [18]:
df_klines_copy.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time,upper_shadow,lower_shadow,real_body
0,1577836800000,7195.24,7196.25,7183.14,7186.68,51.642812,1577836859999,371233.51835535,493,19.59823,140888.41428273,0,2020-01-01 00:00:00,2020-01-01 00:00:59.999,1.01,3.54,8.56
1,1577836860000,7187.67,7188.06,7182.2,7184.03,7.248148,1577836919999,52080.1277878,135,2.031772,14599.21192429,0,2020-01-01 00:01:00,2020-01-01 00:01:59.999,0.39,1.83,3.64
2,1577836920000,7184.41,7184.71,7180.26,7182.43,11.681677,1577836979999,83903.74163545,202,5.479244,39357.08177646,0,2020-01-01 00:02:00,2020-01-01 00:02:59.999,0.3,2.17,1.98
3,1577836980000,7183.83,7188.94,7182.49,7185.94,10.025391,1577837039999,72033.22664851,136,3.294966,23680.57192367,0,2020-01-01 00:03:00,2020-01-01 00:03:59.999,3.0,1.34,2.11
4,1577837040000,7185.54,7185.54,7178.64,7179.78,14.911105,1577837099999,107066.52182466,161,2.369033,17012.01513816,0,2020-01-01 00:04:00,2020-01-01 00:04:59.999,0.0,1.14,5.76


In [19]:
df_klines_copy.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time,upper_shadow,lower_shadow,real_body
1048951,1640908560000,47098.19,47147.14,47098.18,47134.56,6.49057,1640908619999,305861.6268939,387,4.02327,189596.506989,0,2021-12-30 23:56:00,2021-12-30 23:56:59.999,12.58,0.01,36.37
1048952,1640908620000,47134.56,47161.69,47129.96,47144.56,4.78281,1640908679999,225491.3727645,324,1.69518,79919.6452588,0,2021-12-30 23:57:00,2021-12-30 23:57:59.999,17.13,4.6,10.0
1048953,1640908680000,47144.56,47147.89,47122.96,47139.98,4.15772,1640908739999,195959.9917835,249,1.43431,67601.8230559,0,2021-12-30 23:58:00,2021-12-30 23:58:59.999,3.33,17.02,4.58
1048954,1640908740000,47139.98,47141.73,47116.71,47120.87,3.97618,1640908799999,187407.572887,336,0.88827,41866.9158904,0,2021-12-30 23:59:00,2021-12-30 23:59:59.999,1.75,4.16,19.11
1048955,1640908800000,47120.88,47121.3,47080.0,47091.29,14.37694,1640908859999,677226.1825368,514,3.85955,181772.3706326,0,2021-12-31 00:00:00,2021-12-31 00:00:59.999,0.42,11.29,29.59


## Export the Data

### Export parameters

In [20]:
export_path = "../data"
export_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d')
export_extension = ".csv"
full_export_path = os.path.join(export_path, export_filename + export_extension)

### Export

In [21]:
df_klines_copy.to_csv(full_export_path, index=False)