# Get multiple raw data using python-binance

## Import relevant libraries

In [1]:
from binance.client import Client
from dotenv import load_dotenv
import os
import pandas as pd
import datetime

## Set some 'global' parameters

In [2]:
load_dotenv(dotenv_path='../config_files/.env')

True

## Get the data

In [3]:
def create_curl_values(df):
    try:
        results_upper_shadow = []
        results_lower_shadow = []
        results_real_body = []
        for index, row in df.iterrows():
            if row.open > row.close:
                results_upper_shadow.append(row.high - row.open)
                results_lower_shadow.append(row.close - row.low)
                results_real_body.append(row.open - row.close)
            else:
                results_upper_shadow.append(row.high - row.close)
                results_lower_shadow.append(row.open - row.low)
                results_real_body.append(row.close - row.open)

        df['upper_shadow'] = results_upper_shadow
        df['lower_shadow'] = results_lower_shadow
        df['real_body'] = results_real_body

        return df

    except Exception as e:
        print(row.formatted_open_time)
        print(e)

In [4]:
def run():
    try:
        # Create the Binance API client
        client = Client(os.environ["BINANCE_API_KEY"], os.environ["BINANCE_SECRET_KEY"])

        # Define the parameters
        list_asset_ticket = ["BTCUSDT", "ETHUSDT", "BNBUSDT"]
        list_timestamp = [Client.KLINE_INTERVAL_5MINUTE, Client.KLINE_INTERVAL_15MINUTE, Client.KLINE_INTERVAL_30MINUTE, Client.KLINE_INTERVAL_1HOUR, Client.KLINE_INTERVAL_1DAY]
        #list_timestamp = [Client.KLINE_INTERVAL_1MINUTE]
        
        list_year = [2017, 2018, 2019, 2020, 2021, 2022, 2023]
        #list_year = [2022, 2023]
        prefix_start_date = "1 Jan, "
        prefix_end_date = "1 Jan, "

        for y in range(0,len(list_year)-1):
            
            start_date = prefix_start_date + str(list_year[y])
            end_date = prefix_end_date + str(list_year[y+1])

            for asset_ticket in list_asset_ticket:
                for timestamp in list_timestamp:
                    
                    print(f'asset_ticket:{asset_ticket}')
                    print(f'timestamp:{timestamp}')

                    # Get the data
                    klines = client.get_historical_klines(asset_ticket, timestamp, start_date, end_date)
                    print("Get data from API completed")

                    # Convert list of lists to pandas df
                    df_klines = pd.DataFrame(klines, columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
                    print("Convert data to df completed")

                    # Copy original data to other dataframe
                    df_klines_copy = df_klines.copy()

                    # Convert 'object' to float pandas
                    df_klines_copy['open'] = pd.to_numeric(df_klines_copy['open'])
                    df_klines_copy['high'] = pd.to_numeric(df_klines_copy['high'])
                    df_klines_copy['low'] = pd.to_numeric(df_klines_copy['low'])
                    df_klines_copy['close'] = pd.to_numeric(df_klines_copy['close'])
                    df_klines_copy['volume'] = pd.to_numeric(df_klines_copy['volume'])

                    # Convert the 'open_time' and 'close_time' to a Pandas DataTime format
                    df_klines_copy['formatted_open_time'] = pd.to_datetime(df_klines_copy['open_time'], infer_datetime_format=True, unit="ms")
                    df_klines_copy['formatted_close_time'] = pd.to_datetime(df_klines_copy['close_time'], infer_datetime_format=True, unit="ms")

                    # Set the index on the dataframe
                    # Converting Date Column to DateTime Type
                    df_klines_copy['date'] = pd.to_datetime(df_klines_copy['formatted_close_time']) + pd.to_timedelta(1, unit='s')
                    df_klines_copy['date'] = pd.to_datetime(df_klines_copy['date']).dt.strftime('%Y-%m-%d %H:%M:%S')

                    print("Datatype convertions completed")

                    # Create the 'upper_shadow', 'lower_shadow' and 'real_body' values (to compose the CURL)
                    df_klines_copy = create_curl_values(df_klines_copy)

                    print("CURL creation completed")

                    # Export the data
                    export_path = "../../data/00_raw_single/"
                    export_filename = "binance" + \
                                    "_" + asset_ticket + \
                                    "_" + timestamp + \
                                    "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                                    "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d')
                    export_extension = ".csv"
                    full_export_path = os.path.join(export_path, export_filename + export_extension)

                    df_klines_copy.to_csv(full_export_path, index=False)
                    print("Export data completed")

    except Exception as e:
        print(e)

In [5]:
run()

asset_ticket:BTCUSDT
timestamp:5m
Get data from API completed
Convert data to df completed
Datatype convertions completed
CURL creation completed
Export data completed
asset_ticket:BTCUSDT
timestamp:15m
Get data from API completed
Convert data to df completed
Datatype convertions completed
CURL creation completed
Export data completed
asset_ticket:BTCUSDT
timestamp:30m
Get data from API completed
Convert data to df completed
Datatype convertions completed
CURL creation completed
Export data completed
asset_ticket:BTCUSDT
timestamp:1h
Get data from API completed
Convert data to df completed
Datatype convertions completed
CURL creation completed
Export data completed
asset_ticket:BTCUSDT
timestamp:1d
Get data from API completed
Convert data to df completed
Datatype convertions completed
CURL creation completed
Export data completed
asset_ticket:ETHUSDT
timestamp:5m
Get data from API completed
Convert data to df completed
Datatype convertions completed
CURL creation completed
Export data 