In [1]:
# Binance API 
# Need to install python-binance package
# pip install python-binance
from binance import Client as c
from binance import BinanceSocketManager as bsm
from binance import ThreadedDepthCacheManager as tdcm

# General libraries
import pandas as pd
import numpy as np

# Plotting libraries
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go

# stats
from statsmodels.api import tsa # time series analysis
import statsmodels.api as sm
from scipy import stats
from scipy.stats import norm

import math
import os
import time
import glob
from datetime import datetime, timedelta
from dateutil import parser

<a class="anchor" id="1"></a>

# 1. Building the DataFrame 

[Back To Top](#toc)

The code below used to contained the api keys to extract data from Binance exchange. To generate your own api keys, please do the following:

1. Create an account on Binance
2. Login and Verify your account aka. KYC (Know-your-customer)
3. Access https://www.binance.com/en/binance-api
4. In the API section, you will have an option to generate a new API key combination including api_key and api_secret.
5. Replace your keys in 'XXX' below and run the following command

Voila! Just in 5 steps, you will be able to access the K-line data (trading data) of all the cryptos that are listed on Binance. 

The clean dataframe is included in this package as [1H_combined_clean.pkl](https://drive.google.com/file/d/1frgZHZL5LNqe6A9-NySEFSO9ekSN2IMA/view?usp=share_link). This file is used to run other notebook. No need to re-run this notebook

In [3]:
# This is the public and private key provided by Binance API
# It is a READ-ONLY api and the way Binance set it up, it may not read at a stranger IP address. 
# May only read if we are at BrainStation Vancouver office
api_key = 'XXX'
api_secret = 'XXX'

client = c(api_key, api_secret)

<a class="anchor" id="1.1"></a>
## 1.1 Extract USDT Trading pairs
[Back To Top](#toc)

The codes in Section 1.1 are referenced from Nicholas Renotte in the video: Exploratory Data Analysis with the Binance API using Python and Pandas | MLTrader EP1 on Youtube. 

https://www.youtube.com/watch?v=4aqx5P2Y38U&t=545s

<a class="anchor" id="1.1"></a>
### 1.1.1 Access all USDT pairs
[Back To Top](#toc)

In [3]:
# Access all ticket + immedate price
tickers = client.get_all_tickers()
#put them into a DF
tickers_df = pd.DataFrame(tickers)

# Filter out only ticker that is pair USDT
tickers_df = tickers_df[tickers_df['symbol'].str.contains('USDT')]

# Filter out all the forex and stable coins
tickers_df = tickers_df[tickers_df['symbol'].str.contains('^USD*')==False]
# Use Symbol as the index instead of default index
tickers_df.set_index('symbol', inplace =True) #inplace=True apply the function to the dataframe

# check the DF
tickers_df.shape

(413, 1)

In [4]:
# use this code to check for a specific pair
tickers_df.loc['BTCUSDT']

price    17135.88000000
Name: BTCUSDT, dtype: object

In [5]:
# Check he DF:
tickers_df.head()

Unnamed: 0_level_0,price
symbol,Unnamed: 1_level_1
BTCUSDT,17135.88
ETHUSDT,1269.89
BNBUSDT,288.0
BCCUSDT,448.7
NEOUSDT,7.2


In [6]:
# Check for the newest crypto pairs on Binance
tickers_df.index[tickers_df.shape[0]-1]

'HOOKUSDT'

<a class="anchor" id="1.1.2"></a>
### 1.1.2 Extracting K-line of USDT pairs 1 DAY
[Back To Top](#toc)

In [67]:
#HISTORICAL DATA IS K-line
for i in range(0,len(tickers_df.index)-1):
    
    # Access all the K-line data (all price and volume) of a symbol
    #Access date 2000 days ago: Binance listed the crypto in Aug 2017 so 2000 days ago will cover all crypto listed since 2017
    date_2K_ago = str(datetime.now() + timedelta(days=-2000))
    
    historical = client.get_historical_klines(tickers_df.index[i], client.KLINE_INTERVAL_1DAY, start_str=date_2K_ago)
    
    # Put the data into a DF
    hist_df = pd.DataFrame(historical)

    # Name the columns then convert the Open and Close time to second
    hist_df.columns = ['Open time', 'Open', 'High', 'Low', 'Close','Volume', 'Close time', 'Quote asset volume',
                       'Number of trades','Taker buy base asset volume', 'Taker buy quote asset volume', 'ignore']
    hist_df['Open time'] = pd.to_datetime(hist_df['Open time']/ 1000, unit ='s')
    hist_df['Close time'] = pd.to_datetime(hist_df['Close time']/ 1000, unit ='s')

    # Convert all numeric columns into the right format: float64
    numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Quote asset volume', 'Taker buy base asset volume', 'Taker buy quote asset volume']
    hist_df[numeric_columns] = hist_df[numeric_columns].apply(pd.to_numeric, axis=1)

    # Add a column with a symbol of the crypto
    hist_df['symbol'] = tickers_df.index[i]
    
    #Save to .csv 
    path = r'C:\Users\ethai\OneDrive\1.1. Education\1. BrainStation\0. Capstone\data_1D\ ' #super important to have 'r' and '\ ' -backward slash with space in the end
    hist_df.to_csv(path + tickers_df.index[i]+'_1D.csv')

IndentationError: unexpected indent (1030770868.py, line 6)

<a class="anchor" id="1.1.3"></a>
### 1.1.3 Extracting K-line of USDT pairs 1H
[Back To Top](#toc)

We will be more interested in extracting the Kline 1H as it give us more rows ( about 7M rows) comparing to 1D (about 300K rows). In addition, the 1H dataset will give us flexibility in using time series 

In [8]:
#HISTORICAL DATA IS K-line
for i in range(0,len(tickers_df.index)-1):
    
    # Access all the K-line data (all price and volume) of a symbol
    #Access date 2000 days ago: Binance listed the crypto in Aug 2017 so 2000 days ago will cover all crypto listed since 2017
    date_2K_ago = str(datetime.now() + timedelta(days=-2000))
    
    historical = client.get_historical_klines(tickers_df.index[i], client.KLINE_INTERVAL_1HOUR, start_str=date_2K_ago)
    
    # Put the data into a DF
    hist_df = pd.DataFrame(historical)

    # Name the columns then convert the Open and Close time to second
    hist_df.columns = ['Open time', 'Open', 'High', 'Low', 'Close','Volume', 'Close time', 'Quote asset volume',
                       'Number of trades','Taker buy base asset volume', 'Taker buy quote asset volume', 'ignore']
    hist_df['Open time'] = pd.to_datetime(hist_df['Open time']/ 1000, unit ='s')
    hist_df['Close time'] = pd.to_datetime(hist_df['Close time']/ 1000, unit ='s')

    # Convert all numeric columns into the right format: float64
    numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Quote asset volume', 'Taker buy base asset volume', 'Taker buy quote asset volume']
    hist_df[numeric_columns] = hist_df[numeric_columns].apply(pd.to_numeric, axis=1)

    # Add a column with a symbol of the crypto
    hist_df['symbol'] = tickers_df.index[i]
    
    #Save to .csv 
    path = r'C:\Users\ethai\OneDrive\1.1. Education\1. BrainStation\0. Capstone\data_1HH\ ' #super important to have 'r' and '\ ' -backward slash with space in the end
    hist_df.to_csv(path + tickers_df.index[i] + '_1H.csv')

<a class="anchor" id="1.2"></a>
## 1.2 Access all BTC Trading pairs
[Back To Top](#toc)

The following codes are similar to USDT 1 hour, except the crypto pairs will be Crypto-BTC. This is similar to Stock-Gold, rather than USD. This is for REFERENCE ONLY. The data will not be used for further analysis.

<a class="anchor" id="1.2.1"></a>
### 1.2.1 Extracting K-line of BTC pairs 1H
[Back To Top](#toc)

This will access all crypto pairs that end with BTC

In [None]:
#Access all ticket + immedate price
tickers_btc = client.get_all_tickers()
#put them into a DF
tickers_btc_df = pd.DataFrame(tickers_btc)

#Filter out only ticker that is pair END with BTC = 'BTC$' $ means end string
tickers_btc_df = tickers_btc_df[tickers_btc_df['symbol'].str.contains('BTC$')]

#Use Symbol as the index instead of default index
tickers_btc_df.set_index('symbol', inplace =True) #inplace=True apply the function to the dataframe

#check the DF
tickers_btc_df.shape

(411, 1)

In [None]:
# Check for the newest crypto pairs BTC on Binance
tickers_btc_df.index[tickers_btc_df.shape[0]-1]

'OSMOBTC'

In [None]:
date_2K_ago = str(datetime.now() + timedelta(days=-2000))
date_2K_ago

'2017-05-11 22:58:23.616417'

In [None]:
#HISTORICAL DATA IS K-line
for i in range(0,len(tickers_btc_df.index)-1):
    
    # Access all the K-line data (all price and volume) of a symbol
    #Access date 2000 days ago: Binance listed the crypto in Aug 2017 so 2000 days ago will cover all crypto listed since 2017
    date_2K_ago = str(datetime.now() + timedelta(days=-2000))
    
    historical_btc = client.get_historical_klines(tickers_btc_df.index[i], client.KLINE_INTERVAL_1HOUR, start_str=date_2K_ago)
    
    # Put the data into a DF
    hist_btc_df = pd.DataFrame(historical_btc)

    # Name the columns then convert the Open and Close time to second
    hist_btc_df.columns = ['Open time', 'Open', 'High', 'Low', 'Close','Volume', 'Close time', 'Quote asset volume',
                       'Number of trades','Taker buy base asset volume', 'Taker buy quote asset volume', 'ignore']
    hist_btc_df['Open time'] = pd.to_datetime(hist_btc_df['Open time']/ 1000, unit ='s')
    hist_btc_df['Close time'] = pd.to_datetime(hist_btc_df['Close time']/ 1000, unit ='s')

    # Convert all numeric columns into the right format: float64
    numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Quote asset volume', 'Taker buy base asset volume', 'Taker buy quote asset volume']
    hist_btc_df[numeric_columns] = hist_btc_df[numeric_columns].apply(pd.to_numeric, axis=1)

    # Add a column with a symbol of the crypto
    hist_btc_df['symbol'] = tickers_btc_df.index[i]
    
    #Save to .pkl 
    path = r'C:\Users\ethai\OneDrive\1.1. Education\1. BrainStation\0. Capstone\data_1H_btc\ ' #super important to have 'r' and '\ ' -backward slash with space in the end
    hist_btc_df.to_pickle(path + tickers_btc_df.index[i] + '_1H.pkl')

<a class="anchor" id="1.3"></a>
## 1.3 Merging files
[Back To Top](#toc)

One of the mistake is saving the dataset into .csv which exceed its capacity 'The dataset is too large. loading the dataset may lose some data' - warning from tryin to open the .csv file. Thus, we found an article showing other domain to save the file into and pickle seem to be the best one

In [None]:
#FOR 1D 
# Define the file extension
all_files_1D = glob.glob('data_1D/*1D.csv')
all_files_1D
# combine all files in the list
combined_1D = pd.concat((pd.read_csv(f, index_col=None, header=0) for f in all_files_1D))
# export to csv
combined_1D.to_csv("1D_combined.csv")

Use this website to learn about difference type of options other than csv
https://towardsdatascience.com/still-saving-your-data-in-csv-try-these-other-options-9abe8b83db3a

In [3]:
#FOR 1H
# Define the file extension
all_files_1H = glob.glob('data_1HH/*1H.csv')
all_files_1H
# combine all files in the list
combined_1H = pd.concat((pd.read_csv(f, index_col=None, header=0) for f in all_files_1H))
# export to pickle to save size as csv cant handle large size
combined_1H.to_pickle("1HH_combined.pkl")

This method is for the BTC pairs. As the files are saved as .pkl thus, only need to concat all of them together

In [None]:
#FOR 1H
# Define the file extension
all_files_1H_btc = glob.glob('data_1H_btc/*1H.pkl')

# Combine all files in the list
combined_1H_btc = pd.concat((pd.read_pickle(f) for f in all_files_1H_btc))
# Export to pickle to save size as csv cant handle large size
combined_1H_btc.to_pickle("1H_combined_btc.pkl")

<a class="anchor" id="1.4"></a>
## 1.4 Basic EDA and Cleaning DF
[Back To Top](#toc)

<a class="anchor" id="1.4.1"></a>
### 1.4.1 Cleaning USDT pairs
[Back To Top](#toc)

In [3]:
# Load the .pkl file
combined_1Hv1 = pd.read_pickle('1HH_combined.pkl')
combined_1Hv1

# Drop the uneccessary column (already done in v4 but didnt save correctly)
combined_1Hv1 = combined_1Hv1.drop(["Unnamed: 0", 'ignore'], axis=1)

# Convert Open time and Close time to datetime format
combined_1Hv1['Open time'] = pd.to_datetime(combined_1Hv1['Open time'])
# Also round the 'Close time to the nearest second'
combined_1Hv1['Close time'] = pd.to_datetime(combined_1Hv1['Close time'])

# Round these columns to 2 decimals to make them look better
combined_1Hv1['Quote asset volume'] = combined_1Hv1['Quote asset volume'].round(2)
combined_1Hv1['Taker buy quote asset volume'] = combined_1Hv1['Taker buy quote asset volume'].round(2)
combined_1Hv1['Close time'] = combined_1Hv1['Close time'].round('1s')

# Check the dataframe
display(combined_1Hv1)

# Check for dF dtype
print(combined_1Hv1.info(show_counts=True))

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,symbol
0,2021-04-15 07:00:00,10.0000,10.2500,9.9700,10.0700,9367.65,2021-04-15 08:00:00,95067.46,816,7204.75,73150.56,1INCHDOWNUSDT
1,2021-04-15 08:00:00,10.0300,10.3000,9.7400,10.2100,2426.47,2021-04-15 09:00:00,24341.43,197,1115.29,11307.37,1INCHDOWNUSDT
2,2021-04-15 09:00:00,10.1700,10.2500,9.8600,9.9200,2581.83,2021-04-15 10:00:00,25920.09,200,1799.05,18031.08,1INCHDOWNUSDT
3,2021-04-15 10:00:00,9.9500,10.2000,9.7500,9.9700,1918.39,2021-04-15 11:00:00,19040.88,224,970.42,9642.83,1INCHDOWNUSDT
4,2021-04-15 11:00:00,9.9900,9.9900,8.7800,9.1000,9166.34,2021-04-15 12:00:00,84599.23,727,7616.09,70373.02,1INCHDOWNUSDT
...,...,...,...,...,...,...,...,...,...,...,...,...
33099,2022-12-10 18:00:00,0.1920,0.1934,0.1918,0.1930,351724.00,2022-12-10 19:00:00,67712.02,305,191306.00,36844.46,ZRXUSDT
33100,2022-12-10 19:00:00,0.1931,0.1936,0.1928,0.1929,108416.00,2022-12-10 20:00:00,20945.84,190,44794.00,8654.92,ZRXUSDT
33101,2022-12-10 20:00:00,0.1928,0.1931,0.1919,0.1930,329179.00,2022-12-10 21:00:00,63382.62,221,122794.00,23663.61,ZRXUSDT
33102,2022-12-10 21:00:00,0.1930,0.1931,0.1922,0.1922,60141.00,2022-12-10 22:00:00,11595.18,132,17820.00,3438.39,ZRXUSDT


<class 'pandas.core.frame.DataFrame'>
Int64Index: 7046348 entries, 0 to 33103
Data columns (total 12 columns):
 #   Column                        Non-Null Count    Dtype         
---  ------                        --------------    -----         
 0   Open time                     7046348 non-null  datetime64[ns]
 1   Open                          7046348 non-null  float64       
 2   High                          7046348 non-null  float64       
 3   Low                           7046348 non-null  float64       
 4   Close                         7046348 non-null  float64       
 5   Volume                        7046348 non-null  float64       
 6   Close time                    7046348 non-null  datetime64[ns]
 7   Quote asset volume            7046348 non-null  float64       
 8   Number of trades              7046348 non-null  int64         
 9   Taker buy base asset volume   7046348 non-null  float64       
 10  Taker buy quote asset volume  7046348 non-null  float64       
 11  

<a class="anchor" id="1.4.2"></a>
### 1.4.2 Remove all Forex and Stable coin pairs
[Back To Top](#toc)

Earlier during the extraction, there were a few crypto pairs that were STABLE COINS got added into the .pkl. This is not good as stable coins is a 1:1 conversion, for example USDT:BUSD will be the same. The chart will have a straight line at 1.0 USD, just that USDT is from Tether and BUSD is from Binance. They make their own stable coin which is based on the USD. In addition, there are forex crypto pairs such as USDT:ZAR(South African dollar), USDT:RUB (Russian Rubble). We will remove all these forex coins as well. It is important to eliminate these stable coins and forex coins from the dataset otherwise it will affect the predictions. <br>

Using regex101.com to check for those stable coins. they will have the form 'USDT***'

In [4]:
# Check for all the stable coins and forex
combined_1Hv2 = combined_1Hv1[combined_1Hv1['symbol'].str.contains('^USD*')==True]

# Counts all unique stable coins and forex
print(combined_1Hv2['symbol'].value_counts())

Series([], Name: symbol, dtype: int64)


In [5]:
# Dropp all the stable coints and forex by using ==False, save it to the new DF
combined_1Hv2 = combined_1Hv1[combined_1Hv1['symbol'].str.contains('^USD*')==False]

# Check the new DF
combined_1Hv2.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7046348 entries, 0 to 33103
Data columns (total 12 columns):
 #   Column                        Non-Null Count    Dtype         
---  ------                        --------------    -----         
 0   Open time                     7046348 non-null  datetime64[ns]
 1   Open                          7046348 non-null  float64       
 2   High                          7046348 non-null  float64       
 3   Low                           7046348 non-null  float64       
 4   Close                         7046348 non-null  float64       
 5   Volume                        7046348 non-null  float64       
 6   Close time                    7046348 non-null  datetime64[ns]
 7   Quote asset volume            7046348 non-null  float64       
 8   Number of trades              7046348 non-null  int64         
 9   Taker buy base asset volume   7046348 non-null  float64       
 10  Taker buy quote asset volume  7046348 non-null  float64       
 11  

Great, now the dataset is much cleaner for USDT pairs. Let's save them into a new .csv and .pkl file for further EDA

In [6]:
# Save to .csv
combined_1Hv2.to_csv("1H_combined_clean.csv")

# Save to pickle
#combined_1Hv2.to_pickle("1H_combined_clean.pkl")

### DType Conversion from Float64 to Float32 - REFERENCE ONLY - DO NOT USE

DO NOT CONVERT `FLOAT64` to `FLOAT32` otherwise the round(2) will not work

The following transformation is based on this research https://python.plainenglish.io/predicting-cryptocurrency-values-with-gradient-boosting-machines-using-python-70fe8608b75b. The purpose is to make the dataset smaller and faster to process.

However, after testing, the float32 do not be able to .round(2), it will keep all the decimals. More info here https://stackoverflow.com/questions/455612/limiting-floats-to-two-decimal-points

In [None]:
# Convert all float64 to float32 to reduce file size, we wont perform complex operations, 
#however keep the record here just incase we need to reconvert

# Let assign the 1Hv2 into a new DF
combined_1Hv1_32 = combined_1Hv2

# Select columns with 'float64' dtype and int64
float64_cols = list(combined_1Hv1.select_dtypes(include='float64'))
int64_cols = list(combined_1Hv1.select_dtypes(include='int64'))

# The same code again calling the columns
combined_1Hv1_32[float64_cols] = combined_1Hv1[float64_cols].astype('float32')
combined_1Hv1_32[int64_cols] = combined_1Hv1[int64_cols].astype('int32') #Number of trades CAN only be an integer

# Check for dF dtypes 2
combined_1Hv1_32.info()

### Save data to JSON to check using Tableau

DO NOT CONVERT DATA TO JSON. For Tableau, .csv is enough, Tableau cannot read JSON file created by Python due to different indices and diff format. 

In [None]:
# Check dataframe
combined_1Hv2.head()

#Reset_index is required to save the dataset to .json or perform calculation due to dropping rows earlier in STABLE COINS
combined_1Hv2.reset_index(inplace=True)

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,symbol
0,2021-04-15 07:00:00,10.0,10.25,9.97,10.07,9367.65,2021-04-15 08:00:00,95067.46,816,7204.75,73150.56,1INCHDOWNUSDT
1,2021-04-15 08:00:00,10.03,10.3,9.74,10.21,2426.47,2021-04-15 09:00:00,24341.43,197,1115.29,11307.37,1INCHDOWNUSDT
2,2021-04-15 09:00:00,10.17,10.25,9.86,9.92,2581.83,2021-04-15 10:00:00,25920.09,200,1799.05,18031.08,1INCHDOWNUSDT
3,2021-04-15 10:00:00,9.95,10.2,9.75,9.97,1918.39,2021-04-15 11:00:00,19040.88,224,970.42,9642.83,1INCHDOWNUSDT
4,2021-04-15 11:00:00,9.99,9.99,8.78,9.1,9166.34,2021-04-15 12:00:00,84599.23,727,7616.09,70373.02,1INCHDOWNUSDT


#### Brief Data Analysis Using Tableau

- VENUSDT is VEChain which later on converted to VETUSDT, Need to drop VENUSDT. Source: https://vechaininsider.com/guides/a-complete-guide-to-the-vechain-ven-to-vet-token-swap/

- NBTUSDT 

<a class="anchor" id="1.4.3"></a>
### 1.4.3 EDA BTC
[Back To Top](#toc)

In [None]:
# Read the combined pickle file of BTC
combined_1H_btc = pd.read_pickle('1H_combined_btc.pkl')

# Drop the uneccessary column (already done in v4 but didnt save correctly)
combined_1H_btc = combined_1H_btc.drop(['ignore'], axis=1)

# Round these columns to 2 decimals to make them look better
combined_1H_btc['Quote asset volume'] = combined_1H_btc['Quote asset volume'].round(2)
combined_1H_btc['Taker buy quote asset volume'] = combined_1H_btc['Taker buy quote asset volume'].round(2)
combined_1H_btc['Close time'] = combined_1H_btc['Close time'].round('1s')


# Check the DF
# Display will make the table look nice, print may not
display(combined_1H_btc) 

# Check for dF dtypes
print(combined_1H_btc.info(show_counts=True))

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,symbol
0,2020-12-25 05:00:00,0.000009,0.000168,0.000009,0.000109,2207916.0,2020-12-25 06:00:00,256.88,17168,848358.9,99.52,1INCHBTC
1,2020-12-25 06:00:00,0.000109,0.000113,0.000094,0.000107,1833765.2,2020-12-25 07:00:00,186.91,11343,678073.5,68.95,1INCHBTC
2,2020-12-25 07:00:00,0.000106,0.000123,0.000100,0.000110,2067113.5,2020-12-25 08:00:00,236.29,12743,922906.3,105.74,1INCHBTC
3,2020-12-25 08:00:00,0.000112,0.000120,0.000103,0.000110,1570496.5,2020-12-25 09:00:00,175.10,10418,763736.7,84.82,1INCHBTC
4,2020-12-25 09:00:00,0.000110,0.000116,0.000107,0.000110,862685.8,2020-12-25 10:00:00,96.20,6533,426465.8,47.59,1INCHBTC
...,...,...,...,...,...,...,...,...,...,...,...,...
44901,2022-11-02 02:00:00,0.000012,0.000012,0.000012,0.000012,17619.0,2022-11-02 03:00:00,0.22,97,2737.0,0.03,ZRXBTC
44902,2022-11-02 03:00:00,0.000012,0.000012,0.000012,0.000012,11673.0,2022-11-02 04:00:00,0.14,39,6312.0,0.08,ZRXBTC
44903,2022-11-02 04:00:00,0.000012,0.000012,0.000012,0.000012,3925.0,2022-11-02 05:00:00,0.05,18,1876.0,0.02,ZRXBTC
44904,2022-11-02 05:00:00,0.000012,0.000012,0.000012,0.000012,17741.0,2022-11-02 06:00:00,0.22,109,1349.0,0.02,ZRXBTC


Save a clean version as .csv and .pkl. Use .csv for analyzing with Tableau.

In [None]:
# Save to .csv
combined_1H_btc.to_csv("1H_combined_clean_btc.csv")

# Save to pickle
combined_1H_btc.to_pickle("1H_combined_clean_btc.pkl")

#### Brief Data Analysis Using Tableau

BRIEF ANALYSIS, found a few cryto pairs to drop
- BTCBBTC Bitcoin BEP2 is a Bitcoin clone issued on Binance Chain. Its price is always as close as possible to the price of the underlying asset – Bitcoin – and each BTCB is backed by an actual BTC. The pegged BTCB tokens are therefore 100% backed by their own reserve coin, Bitcoin.
- RENTBTCBTC RenBTC becomes the latest in a rash of products built to expose bitcoin-backed assets to the benefits of Ethereum's various decentralized finance (DeFi) platforms. 1 RenBTC is equivalent to 1 BTC, although the first is actually a ERC-20 token running on Ethereum.
- WBTCBTC wBTC Wrapped Bitcoin (WBTC) is the first ERC20 token backed 1:1 with Bitcoin. 
