# Getting Historical Data (OHLC & Volume) - Part 2

In [113]:
from binance.client import Client
import pandas as pd
keys = pd.read_csv("~/Documents/temp/bnc/api_key.txt", sep=" ", header=None)
api_key = keys[0][0]
secret_key = keys[0][1]
client = Client(api_key = api_key, api_secret = secret_key, tld = "com")
client

<binance.client.Client at 0x123a31730>

In [114]:
# putting it all together (from part 1) in a function
# if end is not specified retrieved data will be till now
def get_history(symbol, interval, start, end = None):
    # get historical data for a certain coin from the API
    # safe data in local variable bars
    bars = client.get_historical_klines(symbol = symbol, interval = interval, 
                                        start_str = start, end_str = end, limit = 1000)
    # converting JSON data into data frame
    df = pd.DataFrame(bars)
    # createb additional column Date by converting unixtime into datetime
    df["Date"] = pd.to_datetime(df.iloc[:,0], unit = "ms") # adds a Date column to the end of the table
    # add meaningful column headers
    df.columns = ["Open Time", "Open", "High", "Low", "Close",
              "Volume", "Close Time", "Quote Asset Volume",
              "Number of Trades", "Taker Buy Base Asset Volume",
              "Taker Buy Quote Asset Volume", "Ignore", "Date"]
    # selecting the most important information from the dataframe
    df = df[["Date", "Open", "High", "Low", "Close", "Volume"]].copy()
    # setting the datetime information as index
    df.set_index("Date", inplace = True) # setting the Date column as the index
    # convertion the objects in the columns to floats
    for column in df.columns:
        df[column] = pd.to_numeric(df[column], errors = "coerce")
    # returning the data frame
    return df

In [115]:
# getting the very first available timestamp on binance
timestamp = client._get_earliest_valid_timestamp(symbol = "BTCUSDT", interval = "1d")
timestamp

1502928000000

## Daily data until today/now

In [116]:
# getting the historical data for the BTCUSDT pair
# no end parameter is set so data till now will be returned
df = get_history(symbol = "BTCUSDT", interval = "1d", start = timestamp)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-17,4261.48,4485.39,4200.74,4285.08,795.150377
2017-08-18,4285.08,4371.52,3938.77,4108.37,1199.888264
2017-08-19,4108.37,4184.69,3850.00,4139.98,381.309763
2017-08-20,4120.98,4211.08,4032.62,4086.29,467.083022
2017-08-21,4069.13,4119.62,3911.79,4016.00,691.743060
...,...,...,...,...,...
2022-05-23,30293.93,30670.51,28866.35,29109.15,63901.499320
2022-05-24,29109.14,29845.86,28669.00,29654.58,59442.960360
2022-05-25,29654.58,30223.74,29294.21,29542.15,59537.386590
2022-05-26,29542.14,29886.64,28019.56,29201.35,94581.654630


In [117]:
df.info() # shows the number and time range of entries for the DatetimeIndex

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1745 entries, 2017-08-17 to 2022-05-27
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    1745 non-null   float64
 1   High    1745 non-null   float64
 2   Low     1745 non-null   float64
 3   Close   1745 non-null   float64
 4   Volume  1745 non-null   float64
dtypes: float64(5)
memory usage: 81.8 KB


## Daily data for specified time period

Valid intervals for the start and end strings are 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w, 1M

In [118]:
# specifying start and end date as strings
startdate = '2021-01-01'
enddate = '2021-06-30'
# getting historical data for limited time range
df = get_history(symbol = "BTCUSDT", interval = "1d", start = startdate, end = enddate)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01,28923.63,29600.00,28624.57,29331.69,54182.925011
2021-01-02,29331.70,33300.00,28946.53,32178.33,129993.873362
2021-01-03,32176.45,34778.11,31962.99,33000.05,120957.566750
2021-01-04,33000.05,33600.00,28130.00,31988.71,140899.885690
2021-01-05,31989.75,34360.00,29900.00,33949.53,116049.997038
...,...,...,...,...,...
2021-06-26,31576.09,32730.00,30151.00,32283.65,107820.375287
2021-06-27,32283.65,34749.00,31973.45,34700.34,96613.244211
2021-06-28,34702.49,35297.71,33862.72,34494.89,82222.267819
2021-06-29,34494.89,36600.00,34225.43,35911.73,90788.796220


## Weekly data for specified time period

In [119]:
startdate = '2021-01-01'
enddate = '2021-06-30'
df = get_history(symbol = "BTCUSDT", interval = "1w", start = startdate, end = enddate)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-04,33000.05,41950.0,28130.0,38150.02,850700.3
2021-01-11,38150.02,40100.0,30420.0,35828.61,895315.1
2021-01-18,35824.99,37850.0,28850.0,32259.9,637026.0
2021-01-25,32259.45,38531.9,29241.72,33092.98,747463.5
2021-02-01,33092.97,40955.51,32296.16,38795.69,583442.3
2021-02-08,38795.69,49707.43,37988.89,48577.79,664186.3
2021-02-15,48580.47,58352.8,45570.79,57408.57,533487.8
2021-02-22,57412.35,57508.47,43000.0,45135.66,737125.7
2021-03-01,45134.11,52640.0,44950.53,50971.75,490819.6
2021-03-08,50959.11,61844.0,49274.67,58968.31,514559.7


## Monthly data for specified time period


In [120]:
startdate = '2021-01-01'
enddate = '2021-06-15'
df = get_history(symbol = "BTCUSDT", interval = "1M", start = startdate, end = enddate)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01,28923.63,41950.0,28130.0,33092.98,3435639.0
2021-02-01,33092.97,58352.8,32296.16,45135.66,2518242.0
2021-03-01,45134.11,61844.0,44950.53,58740.55,2098808.0
2021-04-01,58739.46,64854.0,46930.0,57694.27,1993469.0
2021-05-01,57697.25,59500.0,30000.0,37253.81,3536245.0
2021-06-01,37253.82,41330.0,28805.0,35045.0,2901775.0


## Most recent monthly data (until today/now)

In [121]:
startdate = '2021-01-01'
# no end date set so data till now will be returned
df = get_history(symbol = "BTCUSDT", interval = "1M", start = startdate)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01,28923.63,41950.0,28130.0,33092.98,3435639.0
2021-02-01,33092.97,58352.8,32296.16,45135.66,2518242.0
2021-03-01,45134.11,61844.0,44950.53,58740.55,2098808.0
2021-04-01,58739.46,64854.0,46930.0,57694.27,1993469.0
2021-05-01,57697.25,59500.0,30000.0,37253.81,3536245.0
2021-06-01,37253.82,41330.0,28805.0,35045.0,2901775.0
2021-07-01,35045.0,42448.0,29278.0,41461.83,1778463.0
2021-08-01,41461.84,50500.0,37332.7,47100.89,1635403.0
2021-09-01,47100.89,52920.0,39600.0,43824.1,1527800.0
2021-10-01,43820.01,67000.0,43283.03,61299.8,1565556.0


## Intraday data (1H) for specific time period

In [122]:
startdate = '2021-10-01'
enddate = '2021-10-05'
df = get_history(symbol = "BTCUSDT", interval = "1h", start = startdate, end = enddate)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-01 00:00:00,43820.01,44059.00,43661.63,43694.48,2001.56071
2021-10-01 01:00:00,43696.14,43803.94,43417.61,43742.74,1292.55848
2021-10-01 02:00:00,43742.74,43794.89,43283.03,43379.01,1348.70735
2021-10-01 03:00:00,43379.00,43680.00,43352.28,43635.79,993.11980
2021-10-01 04:00:00,43635.79,43715.71,43456.46,43625.02,982.32555
...,...,...,...,...,...
2021-10-04 20:00:00,49380.00,49498.78,48981.24,49006.15,2193.96498
2021-10-04 21:00:00,49006.14,49166.09,48556.09,48931.03,1490.42860
2021-10-04 22:00:00,48931.04,49041.24,48688.83,48916.29,1065.63612
2021-10-04 23:00:00,48920.79,49451.00,48895.11,49224.94,2013.34233


In [123]:
# time added to date
startdate = '2021-10-01 10:00:00'
enddate = '2021-10-05 16:00:00'
df = get_history(symbol = "BTCUSDT", interval = "1h", start = startdate, end = enddate)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-01 10:00:00,44905.99,47786.70,44829.40,47081.46,12252.88214
2021-10-01 11:00:00,47081.47,47659.32,46957.48,47488.08,4669.20863
2021-10-01 12:00:00,47488.07,47886.11,47170.23,47214.37,5156.10571
2021-10-01 13:00:00,47214.37,47379.99,46957.14,46980.94,2595.13896
2021-10-01 14:00:00,46980.94,47347.53,46763.68,47130.00,3479.22010
...,...,...,...,...,...
2021-10-05 12:00:00,49877.06,50110.00,49705.00,50000.00,2161.32487
2021-10-05 13:00:00,50000.00,50320.00,49772.63,50207.96,2108.42441
2021-10-05 14:00:00,50207.96,50388.00,49575.94,49841.79,3331.24836
2021-10-05 15:00:00,49841.78,50205.21,49700.00,49793.57,2394.23691


## Intraday data (1m) for specific time period

In [124]:
startdate = '2021-10-01 10:29:00'
enddate = '2021-10-05 16:55:00'
df = get_history(symbol = "BTCUSDT", interval = "1m", start = startdate, end = enddate)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-01 10:29:00,46721.59,46950.00,45932.21,46497.67,962.95022
2021-10-01 10:30:00,46449.22,46838.84,46223.52,46445.35,697.40019
2021-10-01 10:31:00,46465.68,46746.12,46409.71,46621.49,391.05000
2021-10-01 10:32:00,46630.85,47086.16,46600.31,46740.92,522.95147
2021-10-01 10:33:00,46740.92,46764.73,46531.87,46627.92,304.92785
...,...,...,...,...,...
2021-10-05 16:51:00,50034.00,50087.89,50030.03,50069.68,49.89487
2021-10-05 16:52:00,50069.68,50100.00,50062.41,50094.98,19.50751
2021-10-05 16:53:00,50095.90,50095.91,50042.03,50042.04,30.31715
2021-10-05 16:54:00,50042.04,50051.51,49994.99,50024.35,23.80710


## Most recent (last 2 hours) intraday data (1m)

In [125]:
from datetime import datetime, timedelta

In [126]:
# determin the current UTC time
now = datetime.utcnow()
now

datetime.datetime(2022, 5, 27, 13, 0, 25, 26044)

In [127]:
# time two hours ago
two_hours_before = now - timedelta(hours = 2) # returns datetime object
two_hours_before

datetime.datetime(2022, 5, 27, 11, 0, 25, 26044)

In [128]:
# converting datetime object to string
str(two_hours_before)

'2022-05-27 11:00:25.026044'

In [129]:
df = get_history(symbol = "BTCUSDT", interval = "1m", start = str(two_hours_before))
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-05-27 11:01:00,28825.48,28861.02,28810.00,28857.39,75.14419
2022-05-27 11:02:00,28857.38,28859.14,28827.62,28837.37,65.52711
2022-05-27 11:03:00,28837.38,28872.90,28837.37,28869.81,95.71977
2022-05-27 11:04:00,28869.81,28885.81,28869.80,28873.47,44.77180
2022-05-27 11:05:00,28873.46,28897.96,28862.24,28891.21,56.33812
...,...,...,...,...,...
2022-05-27 12:56:00,29231.43,29231.44,29183.64,29190.11,165.04303
2022-05-27 12:57:00,29190.12,29214.43,29184.81,29214.43,47.77840
2022-05-27 12:58:00,29214.42,29257.37,29214.42,29235.50,38.13533
2022-05-27 12:59:00,29235.51,29267.45,29235.50,29259.90,29.57851
