# Get Data using python-binance

## Import relevant libraries

In [1]:
from binance.client import Client
from dotenv import load_dotenv
import os
import pandas as pd
import datetime

## Set some 'global' parameters

In [2]:
load_dotenv()

True

## Get the data

### Create the client

In [3]:
client = Client(os.environ["BINANCE_API_KEY"], os.environ["BINANCE_SECRET_KEY"])

### Parameters

In [6]:
asset_ticket = "BTCUSDT"
#asset_ticket = "ETHUSDT"
#asset_ticket = "BNBUSDT"

timestamp = Client.KLINE_INTERVAL_1MINUTE
#timestamp = Client.KLINE_INTERVAL_15MINUTE
#timestamp = Client.KLINE_INTERVAL_1HOUR
#timestamp = Client.KLINE_INTERVAL_1DAY

start_date = "1 Jan, 2020"
end_date = "31 Ago, 2022"

### Get the data

In [7]:
klines = client.get_historical_klines(asset_ticket, timestamp, start_date, end_date)

### Convert list of lists to Pandas Datafrae

In [None]:
df_klines = pd.DataFrame(klines, columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volumne', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])

### Check the data

In [None]:
df_klines.sample(5)

In [None]:
df_klines.describe()

In [None]:
df_klines.info()

In [None]:
print(df_klines.isnull().sum())

## Feature Engineering

### Copy original data

In [None]:
df_klines_copy = df_klines.copy()

### Convert 'object' to float pandas

In [None]:
df_klines_copy['open'] = pd.to_numeric(df_klines_copy['open'])
df_klines_copy['high'] = pd.to_numeric(df_klines_copy['high'])
df_klines_copy['low'] = pd.to_numeric(df_klines_copy['low'])
df_klines_copy['close'] = pd.to_numeric(df_klines_copy['close'])
df_klines_copy['volume'] = pd.to_numeric(df_klines_copy['volume'])

### Convert the 'open_time' and 'close_time' to a Pandas DataTime format

In [None]:
df_klines_copy['formatted_open_time'] = pd.to_datetime(df_klines_copy['open_time'], infer_datetime_format=True, unit="ms")
df_klines_copy['formatted_close_time'] = pd.to_datetime(df_klines_copy['close_time'], infer_datetime_format=True, unit="ms")

### Check the data - head and tail

In [None]:
df_klines_copy.head()

In [None]:
df_klines_copy.tail()

### Create the 'upper_shadow', 'lower_shadow' and 'real_body' values (to compose the CURL)

In [None]:
def create_curl_values(df):
    try:
        results_upper_shadow = []
        results_lower_shadow = []
        results_real_body = []
        for index, row in df.iterrows():
            if row.open > row.close:
                results_upper_shadow.append(row.high - row.open)
                results_lower_shadow.append(row.close - row.low)
                results_real_body.append(row.open - row.close)
            else:
                results_upper_shadow.append(row.high - row.close)
                results_lower_shadow.append(row.open - row.low)
                results_real_body.append(row.close - row.open)

        df['upper_shadow'] = results_upper_shadow
        df['lower_shadow'] = results_lower_shadow
        df['real_body'] = results_real_body

        return df

    except Exception as e:
        print(row.formatted_open_time)
        print(e)

In [None]:
df_klines_copy = create_curl_values(df_klines_copy)

### Check the data - head and tail

In [None]:
df_klines_copy.head()

In [None]:
df_klines_copy.tail()

## Export the Data

### Export parameters

In [None]:
export_path = "../data"
export_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d')
export_extension = ".csv"
full_export_path = os.path.join(export_path, export_filename + export_extension)

### Export

In [None]:
df_klines_copy.to_csv(full_export_path, index=False)