# Get Data using python-binance

## Import relevant libraries

In [1]:
from binance.client import Client
from dotenv import load_dotenv
import os
import pandas as pd
import datetime

## Set some 'global' parameters

In [12]:
load_dotenv('.env')

True

## Get the data

### Create the client

In [14]:
client = Client(os.environ["BINANCE_API_KEY"], os.environ["BINANCE_SECRET_KEY"])

### Parameters

In [15]:
asset_ticket = "BTCUSDT"
#asset_ticket = "ETHUSDT"
#asset_ticket = "BNBUSDT"

#timestamp = Client.KLINE_INTERVAL_1MINUTE
#timestamp = Client.KLINE_INTERVAL_5MINUTE
timestamp = Client.KLINE_INTERVAL_15MINUTE
#timestamp = Client.KLINE_INTERVAL_1HOUR
#timestamp = Client.KLINE_INTERVAL_1DAY

start_date = "30 Jan, 2023"
end_date = "31 Jan, 2023"

### Get the data

In [27]:
klines = client.get_historical_klines(asset_ticket, timestamp, start_date, end_date)
#klines = client.get_historical_klines(asset_ticket, timestamp, start_date) #To fetch something until now

### Convert list of lists to Pandas Datafrae

In [28]:
df_klines = pd.DataFrame(klines, columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volumne', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])

### Check the data

In [29]:
df_klines.sample(5)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
110,1675135800000,22842.1,22857.04,22814.81,22844.22,1606.17702,1675136699999,36687811.5477837,46311,761.22242,17387774.4023294,0
101,1675127700000,22823.46,22829.0,22782.53,22794.55,2025.50967,1675128599999,46188230.8842723,54186,995.93627,22711273.5920607,0
82,1675110600000,22670.58,22725.98,22651.46,22716.42,3317.07772,1675111499999,75289710.28982,88345,1600.85402,36337217.5920732,0
34,1675067400000,23550.92,23599.22,23507.0,23568.92,3355.32451,1675068299999,79079647.044292,87128,1675.29758,39486659.9530511,0
56,1675087200000,23057.72,23180.94,23027.3,23175.71,3703.04764,1675088099999,85551041.2642845,94378,2013.09135,46512225.7362862,0


In [30]:
df_klines.describe()

Unnamed: 0,open_time,close_time,number_of_trades
count,132.0,132.0,132.0
mean,1675096000000.0,1675097000000.0,74757.659091
std,34424260.0,34424260.0,30295.205643
min,1675037000000.0,1675038000000.0,29869.0
25%,1675066000000.0,1675067000000.0,53971.5
50%,1675096000000.0,1675097000000.0,67812.5
75%,1675125000000.0,1675126000000.0,86857.75
max,1675155000000.0,1675156000000.0,215943.0


In [31]:
df_klines.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132 entries, 0 to 131
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   open_time                     132 non-null    int64 
 1   open                          132 non-null    object
 2   high                          132 non-null    object
 3   low                           132 non-null    object
 4   close                         132 non-null    object
 5   volume                        132 non-null    object
 6   close_time                    132 non-null    int64 
 7   quote_asset_volumne           132 non-null    object
 8   number_of_trades              132 non-null    int64 
 9   taker_buy_base_asset_volume   132 non-null    object
 10  taker_buy_quote_asset_volume  132 non-null    object
 11  ignore                        132 non-null    object
dtypes: int64(3), object(9)
memory usage: 12.5+ KB


In [32]:
print(df_klines.isnull().sum())

open_time                       0
open                            0
high                            0
low                             0
close                           0
volume                          0
close_time                      0
quote_asset_volumne             0
number_of_trades                0
taker_buy_base_asset_volume     0
taker_buy_quote_asset_volume    0
ignore                          0
dtype: int64


## Feature Engineering

### Copy original data

In [33]:
df_klines_copy = df_klines.copy()

### Convert 'object' to float pandas

In [34]:
df_klines_copy['open'] = pd.to_numeric(df_klines_copy['open'])
df_klines_copy['high'] = pd.to_numeric(df_klines_copy['high'])
df_klines_copy['low'] = pd.to_numeric(df_klines_copy['low'])
df_klines_copy['close'] = pd.to_numeric(df_klines_copy['close'])
df_klines_copy['volume'] = pd.to_numeric(df_klines_copy['volume'])

### Convert the 'open_time' and 'close_time' to a Pandas DataTime format

In [35]:
df_klines_copy['formatted_open_time'] = pd.to_datetime(df_klines_copy['open_time'], infer_datetime_format=True, unit="ms")
df_klines_copy['formatted_close_time'] = pd.to_datetime(df_klines_copy['close_time'], infer_datetime_format=True, unit="ms")

### Check the data - head and tail

In [36]:
df_klines_copy.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
0,1675036800000,23743.37,23800.51,23713.64,23772.51,3116.98463,1675037699999,74072219.8961114,85890,1569.17022,37292368.6978004,0,2023-01-30 00:00:00,2023-01-30 00:14:59.999
1,1675037700000,23773.18,23784.96,23683.96,23708.51,3136.50005,1675038599999,74384627.6889652,86762,1527.83108,36232533.3109217,0,2023-01-30 00:15:00,2023-01-30 00:29:59.999
2,1675038600000,23708.51,23754.27,23692.87,23748.0,2116.86328,1675039499999,50235967.6254949,67987,1076.71462,25553003.0000541,0,2023-01-30 00:30:00,2023-01-30 00:44:59.999
3,1675039500000,23747.52,23775.8,23726.01,23751.43,1891.48873,1675040399999,44939849.0410522,55865,891.82806,21189597.8723924,0,2023-01-30 00:45:00,2023-01-30 00:59:59.999
4,1675040400000,23751.02,23765.9,23675.1,23682.0,2444.47409,1675041299999,57982255.1364612,71014,1140.30518,27048104.6674515,0,2023-01-30 01:00:00,2023-01-30 01:14:59.999


In [37]:
df_klines_copy.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore,formatted_open_time,formatted_close_time
127,1675151100000,22909.52,22989.48,22906.82,22978.24,5174.85864,1675151999999,118819453.6404809,112623,2669.96634,61305251.3790974,0,2023-01-31 07:45:00,2023-01-31 07:59:59.999
128,1675152000000,22978.24,22996.84,22923.33,22926.89,3491.3486,1675152899999,80161462.2583208,86917,1674.99254,38461031.8268422,0,2023-01-31 08:00:00,2023-01-31 08:14:59.999
129,1675152900000,22926.89,22966.68,22914.52,22919.81,2672.23556,1675153799999,61311315.1625011,74045,1322.18186,30337379.1504077,0,2023-01-31 08:15:00,2023-01-31 08:29:59.999
130,1675153800000,22919.8,22925.61,22886.01,22892.04,2273.08689,1675154699999,52072931.0959403,67137,1114.29538,25527752.3440896,0,2023-01-31 08:30:00,2023-01-31 08:44:59.999
131,1675154700000,22892.43,22908.74,22837.14,22838.96,1195.91906,1675155599999,27350469.1281937,29869,555.15936,12696665.9181563,0,2023-01-31 08:45:00,2023-01-31 08:59:59.999


### Create the 'upper_shadow', 'lower_shadow' and 'real_body' values (to compose the CURL)

In [None]:
def create_curl_values(df):
    try:
        results_upper_shadow = []
        results_lower_shadow = []
        results_real_body = []
        for index, row in df.iterrows():
            if row.open > row.close:
                results_upper_shadow.append(row.high - row.open)
                results_lower_shadow.append(row.close - row.low)
                results_real_body.append(row.open - row.close)
            else:
                results_upper_shadow.append(row.high - row.close)
                results_lower_shadow.append(row.open - row.low)
                results_real_body.append(row.close - row.open)

        df['upper_shadow'] = results_upper_shadow
        df['lower_shadow'] = results_lower_shadow
        df['real_body'] = results_real_body

        return df

    except Exception as e:
        print(row.formatted_open_time)
        print(e)

In [None]:
df_klines_copy = create_curl_values(df_klines_copy)

### Check the data - head and tail

In [None]:
df_klines_copy.head()

In [None]:
df_klines_copy.tail()

## Export the Data

### Export parameters

In [None]:
export_path = "../data"
export_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d')
export_extension = ".csv"
full_export_path = os.path.join(export_path, export_filename + export_extension)

### Export

In [None]:
df_klines_copy.to_csv(full_export_path, index=False)