In [1]:
""" This File is our baseline tool for getting the Open, High, Low, Close, Volume for BTC.
    It uses Binance API and creates DataFrames for further data manipulation and TA.
    We will use this for the Logistic Regression."""
from binance import Client, ThreadedWebsocketManager, ThreadedDepthCacheManager
import pandas as pd
import mplfinance as mpl
import os
import numpy as np
from hvplot import hvPlot
import pandas as pd
from ta import add_all_ta_features
from ta.utils import dropna
import pandas_ta as ta
from finta import TA

In [2]:
api_key = os.environ.get("api_key")
api_secret = os.environ.get("api_secret")

In [3]:
client = Client(api_key, api_secret)

In [4]:
# BTC Minute Data from past 50 days
historical_df = client.get_historical_klines('BTCUSDC', Client.KLINE_INTERVAL_1DAY, '1 Jan 2016')

In [5]:
hist_df = pd.DataFrame(historical_df)

In [6]:
hist_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1544832000000,3200.0,3312.32,3000.0,3183.47,283.213859,1544918399999,897046.63404699,1050,141.639073,448709.23892821,0
1,1544918400000,3183.47,3257.31,3178.54,3199.27,423.048529,1545004799999,1361883.3993102,1506,213.700014,689502.9241408,0
2,1545004800000,3192.69,3587.35,3187.25,3494.65,1163.940291,1545091199999,3968237.61401306,5062,438.247287,1484453.16743536,0
3,1545091200000,3507.62,3683.35,3428.34,3670.11,1251.139627,1545177599999,4409438.80579916,4707,739.77013,2612032.94942774,0
4,1545177600000,3673.18,3920.0,3618.53,3676.32,2655.242827,1545263999999,9978823.16211568,11297,1288.754059,4849888.20273553,0


In [7]:
hist_df.columns = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume', 
                    'Number of Trades', 'TB Base Volume', 'TB Quote Volume', 'Ignore']

In [8]:
hist_df.tail()

Unnamed: 0,Open Time,Open,High,Low,Close,Volume,Close Time,Quote Asset Volume,Number of Trades,TB Base Volume,TB Quote Volume,Ignore
1026,1633478400000,51502.83,55794.07,50408.96,55330.92,2133.377701,1633564799999,113716193.94630434,64808,1038.534508,55384143.31644654,0
1027,1633564800000,55332.46,55352.49,53392.38,53767.81,1434.18373,1633651199999,78000326.3310113,49241,695.69303,37858184.8396023,0
1028,1633651200000,53788.03,56150.9,53627.77,53918.86,1285.581965,1633737599999,70304244.52617723,41679,612.561695,33514670.05411793,0
1029,1633737600000,53953.98,55500.0,53684.22,54967.95,1105.48293,1633823999999,60575064.1096808,29103,522.53372,28631082.6915074,0
1030,1633824000000,54964.06,56103.79,54096.07,55457.5,932.93632,1633910399999,51520520.264793,25865,448.09196,24752390.0458569,0


In [9]:
# hist_df['Open Time'] = pd.to_datetime(hist_df['Open Time']/1000, unit='s')
# hist_df['Close Time'] = pd.to_datetime(hist_df['Close Time']/1000, unit='s')

In [10]:
numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Quote Asset Volume', 'TB Base Volume', 'TB Quote Volume']

In [11]:
hist_df[numeric_columns] = hist_df[numeric_columns].apply(pd.to_numeric, axis=1)

In [12]:
hist_df.tail()

Unnamed: 0,Open Time,Open,High,Low,Close,Volume,Close Time,Quote Asset Volume,Number of Trades,TB Base Volume,TB Quote Volume,Ignore
1026,1633478400000,51502.83,55794.07,50408.96,55330.92,2133.377701,1633564799999,113716200.0,64808,1038.534508,55384140.0,0
1027,1633564800000,55332.46,55352.49,53392.38,53767.81,1434.18373,1633651199999,78000330.0,49241,695.69303,37858180.0,0
1028,1633651200000,53788.03,56150.9,53627.77,53918.86,1285.581965,1633737599999,70304240.0,41679,612.561695,33514670.0,0
1029,1633737600000,53953.98,55500.0,53684.22,54967.95,1105.48293,1633823999999,60575060.0,29103,522.53372,28631080.0,0
1030,1633824000000,54964.06,56103.79,54096.07,55457.5,932.93632,1633910399999,51520520.0,25865,448.09196,24752390.0,0


In [13]:
hist_df.dtypes


Open Time               int64
Open                  float64
High                  float64
Low                   float64
Close                 float64
Volume                float64
Close Time              int64
Quote Asset Volume    float64
Number of Trades        int64
TB Base Volume        float64
TB Quote Volume       float64
Ignore                 object
dtype: object

In [14]:
hist_df.describe()


Unnamed: 0,Open Time,Open,High,Low,Close,Volume,Close Time,Quote Asset Volume,Number of Trades,TB Base Volume,TB Quote Volume
count,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0
mean,1589328000000.0,18864.379564,19462.447856,18215.203094,18914.704918,2121.781135,1589414000000.0,45207620.0,30003.298739,1021.663237,21992850.0
std,25727190000.0,16951.164364,17534.913047,16305.932269,16981.923682,1387.085616,25727190000.0,55287070.0,23900.458711,684.05002,27405740.0
min,1544832000000.0,3183.47,3257.31,3000.0,3183.47,283.213859,1544918000000.0,897046.6,1050.0,122.332602,448709.2
25%,1567080000000.0,7877.49,8103.625,7581.035,7885.735,1201.107428,1567166000000.0,10861850.0,12283.0,556.614359,5274732.0
50%,1589328000000.0,10154.84,10371.2,9830.0,10158.38,1818.059387,1589414000000.0,20473880.0,22565.0,876.085216,9741190.0
75%,1611576000000.0,32920.695,34093.18,31255.0,33013.815,2613.518779,1611662000000.0,61258530.0,43509.0,1277.251404,29336880.0
max,1633824000000.0,63583.85,64882.47,62032.95,63554.56,13553.429477,1633910000000.0,512865200.0,226279.0,6347.727851,241080100.0


In [None]:
hist_df.info()

In [17]:
hist_df['Open Time'] = pd.to_datetime(hist_df['Open Time']/1000, unit='s')

hist_df['Close Time'] = pd.to_datetime(hist_df['Close Time']/1000, unit='s')

TypeError: cannot perform __truediv__ with this index type: DatetimeArray

In [18]:
# Just get the Open Time, Open, High, Low, Close, Volume
btc_ohlcv_daily = hist_df.iloc[:,0:6]

In [19]:
# CSV SAVER TOOL For Saving Bitcoin data to CSVs to use for past 50 days Minute data for Deep Learning Model
btc_ohlcv_daily.to_csv('btc_daily_ohlcv_2016to2021.csv', index=False)