In [1]:
# Import Dependencies and set variables
import pandas as pd
from pycoingecko import CoinGeckoAPI
from datetime import datetime
import matplotlib.pyplot as plt

In [2]:
# Set Variables
currency = 'usd'
from_date = datetime(2018, 10, 31)
end_date = datetime(2021, 10, 31)
cg = CoinGeckoAPI()

In [3]:
# Functions
def convert_timestamp(df):
    df['date'] = df['date'].apply(lambda d: datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d'))
    df['date'] = pd.to_datetime(df.date)
    

## Pull Exchanges from API

In [4]:
# Gather list of exchanges
exchanges_list = cg.get_exchanges_list()
exchanges_list[0]

{'id': 'binance',
 'name': 'Binance',
 'year_established': 2017,
 'country': 'Cayman Islands',
 'description': '',
 'url': 'https://www.binance.com/',
 'image': 'https://assets.coingecko.com/markets/images/52/small/binance.jpg?1519353250',
 'has_trading_incentive': False,
 'trust_score': 10,
 'trust_score_rank': 1,
 'trade_volume_24h_btc': 835107.122180128,
 'trade_volume_24h_btc_normalized': 835107.122180128}

In [5]:
# Convert exchanges list into dataframe
exchanges_df = pd.DataFrame(exchanges_list).drop(columns=['description', 'url', 'image', 'has_trading_incentive',
    'trust_score', 'trust_score_rank', 'trade_volume_24h_btc_normalized']).set_index('id').\
    sort_values(by=['trade_volume_24h_btc'], ascending=False)

exchanges_df

Unnamed: 0_level_0,name,year_established,country,trade_volume_24h_btc
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
binance,Binance,2017.0,Cayman Islands,835107.122180
okex,OKEx,2013.0,Belize,192815.944754
gdax,Coinbase Exchange,2012.0,United States,188108.073829
huobi,Huobi Global,2013.0,Seychelles,155626.390269
coinflex,CoinFLEX,2019.0,Seychelles,127303.981558
...,...,...,...,...
narkasa,Narkasa,2020.0,Turkey,67.256693
therocktrading,TheRockTrading,2011.0,Italy,54.108791
nice_hash,NiceHash,,Slovenia,50.164655
kuna,Kuna Exchange,,United Kingdom,32.940417


## Analyze Exchanges DataFrame

In [6]:
# Find dtypes of Exchanges DataFrame and change if necessary
print('dtypes before >>>\n')
print(exchanges_df.dtypes)

exchanges_df = exchanges_df.convert_dtypes()

print('\ndtypes after >>>\n')
print(exchanges_df.dtypes)

dtypes before >>>

name                     object
year_established        float64
country                  object
trade_volume_24h_btc    float64
dtype: object

dtypes after >>>

name                     string
year_established          Int64
country                  string
trade_volume_24h_btc    Float64
dtype: object


In [7]:
# Find and Fill NA Values
print(exchanges_df.info(verbose = True))

# Fill/Drop NA columns
exchanges_df.country = exchanges_df.country.fillna('No Country Provided')
exchanges_df = exchanges_df.dropna()

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, binance to lcx
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   name                  100 non-null    string 
 1   year_established      91 non-null     Int64  
 2   country               90 non-null     string 
 3   trade_volume_24h_btc  100 non-null    Float64
dtypes: Float64(1), Int64(1), string(2)
memory usage: 4.1+ KB
None


In [8]:
# Describe columns that are numeric
print(exchanges_df.describe())

       year_established  trade_volume_24h_btc
count         91.000000             91.000000
mean        2016.670330          33196.492319
std            2.458793          93893.850544
min         2011.000000             11.201304
25%         2014.000000           1925.289912
50%         2018.000000           7822.110053
75%         2018.000000          26014.841185
max         2020.000000         835107.122180


In [9]:
# Only keep exchanges in the top 50 in terms of 24h btc trade volume
top50=exchanges_df['trade_volume_24h_btc'].quantile(.50)
print(top50)
exchanges_df = exchanges_df[exchanges_df['trade_volume_24h_btc'] >= top50]

7822.110052662963


In [10]:
# Correctly Name Colums and Final Formatting
exchanges_df = exchanges_df.rename(columns={'name': 'Name', 'year_established': 'Year_Established', 'country': 'Country', 'trade_volume_24h_btc': 'BTC_24hVolume'})
exchanges_df.index = exchanges_df.index.rename('ExchangeID')

exchanges_df

Unnamed: 0_level_0,Name,Year_Established,Country,BTC_24hVolume
ExchangeID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
binance,Binance,2017,Cayman Islands,835107.12218
okex,OKEx,2013,Belize,192815.944754
gdax,Coinbase Exchange,2012,United States,188108.073829
huobi,Huobi Global,2013,Seychelles,155626.390269
coinflex,CoinFLEX,2019,Seychelles,127303.981558
hitbtc,HitBTC,2013,No Country Provided,109900.199506
bitcoin_com,FMFW.io,2019,Bahamas,106697.711118
changelly,Changelly PRO,2020,Seychelles,96488.783328
zbg,ZBG,2018,China,94418.072979
kucoin,KuCoin,2014,Seychelles,80757.086179


In [11]:
# Gather list of Exchange IDs for later
exchange_id_list = list(exchanges_df.index)
print(len(exchange_id_list))
exchange_id_list

46


['binance',
 'okex',
 'gdax',
 'huobi',
 'coinflex',
 'hitbtc',
 'bitcoin_com',
 'changelly',
 'zbg',
 'kucoin',
 'ftx_spot',
 'crypto_com',
 'upbit',
 'bkex',
 'bitrue',
 'binance_us',
 'zb',
 'kraken',
 'uniswap',
 'bitmart',
 'xt',
 'bithumb',
 'bitfinex',
 'digifinex',
 'mxc',
 'p2pb2b',
 'btcturk',
 'pancakeswap_new',
 'whitebit',
 'bigone',
 'lbank',
 'bitforex',
 'latoken',
 'uniswap_v2',
 'coinbene',
 'aax',
 'ftx_us',
 'wootrade',
 'hbtc',
 'aex',
 'goku',
 'bitstamp',
 'bibox',
 'sushiswap',
 'wazirx',
 'phemex']

## Pull ticker data from API

In [12]:
# Gather ticker data for exchanges in list
n = 0
for exchange in exchange_id_list:
    print(f'Gathering Data for {exchange}...')
    
    exchange_data = cg.get_exchanges_tickers_by_id(id = exchange)
    ticker_data = exchange_data['tickers']
    temp_df = pd.DataFrame(ticker_data)
    
    # Drop Columns
    columns_to_drop = ['market', 'bid_ask_spread_percentage',  'converted_last',
           'converted_volume', 'trust_score',
           'last_traded_at', 'last_fetch_at', 'is_anomaly',
           'is_stale', 'trade_url', 'token_info_url']
    temp_df = temp_df.drop(columns = columns_to_drop)
    
    # Format Columns
    temp_df['ExchangeID'] = exchange
    temp_df = temp_df.rename(columns = {'base': 'Ticker', 'last': 'LastPrice', 'volume': 'Volume', 
                                              'timestamp': 'Time', 'coin_id':'CoinID'})
    temp_df.index.rename('TickerID', inplace=True)
    
    if n == 0:
        tickers_df = temp_df
        n += 1
    else:
        tickers_df = tickers_df.append(temp_df, ignore_index=True)
        
print('Finished Gathering Data!')

Gathering Data for binance...
Gathering Data for okex...
Gathering Data for gdax...
Gathering Data for huobi...
Gathering Data for coinflex...
Gathering Data for hitbtc...
Gathering Data for bitcoin_com...
Gathering Data for changelly...
Gathering Data for zbg...
Gathering Data for kucoin...
Gathering Data for ftx_spot...
Gathering Data for crypto_com...
Gathering Data for upbit...
Gathering Data for bkex...
Gathering Data for bitrue...
Gathering Data for binance_us...
Gathering Data for zb...
Gathering Data for kraken...
Gathering Data for uniswap...
Gathering Data for bitmart...
Gathering Data for xt...
Gathering Data for bithumb...
Gathering Data for bitfinex...
Gathering Data for digifinex...
Gathering Data for mxc...
Gathering Data for p2pb2b...
Gathering Data for btcturk...
Gathering Data for pancakeswap_new...
Gathering Data for whitebit...
Gathering Data for bigone...
Gathering Data for lbank...
Gathering Data for bitforex...
Gathering Data for latoken...
Gathering Data for uni

In [13]:
# View Tickers Table
tickers_df

Unnamed: 0,Ticker,target,LastPrice,Volume,Time,CoinID,target_coin_id,ExchangeID
0,SHIB,USDT,0.000068,1.645127e+14,2021-10-28T19:49:30+00:00,shiba-inu,tether,binance
1,BUSD,USDT,0.999600,1.888287e+09,2021-10-28T20:45:09+00:00,binance-usd,tether,binance
2,BTC,USDT,61914.910000,5.605619e+04,2021-10-28T20:25:54+00:00,bitcoin,tether,binance
3,DOGE,USDT,0.303800,1.681646e+10,2021-10-28T20:35:04+00:00,dogecoin,tether,binance
4,ETH,USDT,4266.410000,5.903070e+05,2021-10-28T20:32:40+00:00,ethereum,tether,binance
...,...,...,...,...,...,...,...,...
4311,LTC,USDT,193.990000,7.800672e+03,2021-10-28T20:36:02+00:00,litecoin,tether,phemex
4312,SUSHI,USDT,10.906000,1.474883e+05,2021-10-28T20:36:02+00:00,sushi,tether,phemex
4313,ZEC,USDT,158.670000,3.388347e+03,2021-10-28T20:36:02+00:00,zcash,tether,phemex
4314,SNX,USDT,9.828000,7.727867e+04,2021-10-28T20:36:02+00:00,havven,tether,phemex


## Analyze Tickers DataFrame

In [14]:
# Find Target Value Counts to keep only USD and USD stablecoins to have value in USD
target_value_counts = tickers_df.target.value_counts()
print(f'The Value Counts for Target Tickers are \n{target_value_counts}')

The Value Counts for Target Tickers are 
USDT                                          2104
BTC                                            616
USD                                            356
ETH                                            233
KRW                                            156
                                              ... 
0XC221B7E65FFC80DE234BBB6667ABDD46593D34F0       1
0XD075E95423C5C4BA1E122CAE0F4CDFA19B82881B       1
0XEDB0414627E6F1E3F082DE65CD4F9C693D78CCA9       1
0XFB130D93E49DCA13264344966A611DC79A456BC5       1
0XEB953EDA0DC65E3246F43DC8FA13F35623BDD5ED       1
Name: target, Length: 114, dtype: int64


In [15]:
# Remove all non USD and USD stablecoins from dataframe
tickers_df = tickers_df[(tickers_df['target'] == 'USDT') | (tickers_df['target'] == 'USD') | (tickers_df['target'] == 'USDC') | (tickers_df['target'] == 'BUSD') | (tickers_df['target'] == 'TUSD') | (tickers_df['target'] == 'HUSD')]

tickers_df.target.value_counts()

USDT    2104
USD      356
USDC      80
BUSD      55
TUSD      11
HUSD       3
Name: target, dtype: int64

In [16]:
# Remove Target and Target Coin ID as it is unnecessary for analysis and rename index
tickers_df = tickers_df.drop(columns=['target', 'target_coin_id'])
tickers_df.index.rename('TickerID', inplace=True)

In [17]:
tickers_df.head()

Unnamed: 0_level_0,Ticker,LastPrice,Volume,Time,CoinID,ExchangeID
TickerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,SHIB,6.8e-05,164512700000000.0,2021-10-28T19:49:30+00:00,shiba-inu,binance
1,BUSD,0.9996,1888287000.0,2021-10-28T20:45:09+00:00,binance-usd,binance
2,BTC,61914.91,56056.19,2021-10-28T20:25:54+00:00,bitcoin,binance
3,DOGE,0.3038,16816460000.0,2021-10-28T20:35:04+00:00,dogecoin,binance
4,ETH,4266.41,590307.0,2021-10-28T20:32:40+00:00,ethereum,binance


In [38]:
# Check and Fix Dtypes
# Find dtypes of Exchanges DataFrame and change if necessary
print('dtypes before >>>\n')
print(tickers_df.dtypes)

tickers_df = tickers_df.convert_dtypes()
tickers_df['Time'] = pd.to_datetime(tickers_df['Time']).apply(lambda x: x.strftime('%Y-%m-%d'))
tickers_df['Time'] = pd.to_datetime(tickers_df['Time'])

print('\ndtypes after >>>\n')
print(tickers_df.dtypes)

dtypes before >>>

Ticker         string
LastPrice     Float64
Volume        Float64
Time           object
CoinID         string
ExchangeID     string
dtype: object

dtypes after >>>

Ticker                string
LastPrice            Float64
Volume               Float64
Time          datetime64[ns]
CoinID                string
ExchangeID            string
dtype: object


In [39]:
# Investigate Further for Null Values
print(tickers_df.info(verbose = True))

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2609 entries, 0 to 4315
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Ticker      2609 non-null   string        
 1   LastPrice   2609 non-null   Float64       
 2   Volume      2609 non-null   Float64       
 3   Time        2609 non-null   datetime64[ns]
 4   CoinID      2609 non-null   string        
 5   ExchangeID  2609 non-null   string        
dtypes: Float64(2), datetime64[ns](1), string(3)
memory usage: 147.8 KB
None


In [40]:
# Display final form of tickers table
tickers_df

Unnamed: 0_level_0,Ticker,LastPrice,Volume,Time,CoinID,ExchangeID
TickerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,SHIB,0.000068,164512682076585.90625,2021-10-28,shiba-inu,binance
1,BUSD,0.9996,1888286899.985994,2021-10-28,binance-usd,binance
2,BTC,61914.91,56056.193828,2021-10-28,bitcoin,binance
3,DOGE,0.3038,16816461123.402624,2021-10-28,dogecoin,binance
4,ETH,4266.41,590307.018567,2021-10-28,ethereum,binance
...,...,...,...,...,...,...
4311,LTC,193.99,7800.671753,2021-10-28,litecoin,phemex
4312,SUSHI,10.906,147488.294372,2021-10-28,sushi,phemex
4313,ZEC,158.67,3388.3467,2021-10-28,zcash,phemex
4314,SNX,9.828,77278.671568,2021-10-28,havven,phemex


## Gather List of Coins to be Used

In [58]:
coin_counts = dict(tickers_df['CoinID'].value_counts())
coin_id_list = []
n = 0
for coin in coin_counts:
    if coin_counts[coin] >= 5:
        coin_id_list.append(coin)
        n += 1
    else:
        continue

print(f'{n} coins were selected out of {len(coin_counts)}')
coin_id_list

114 coins were selected out of 413


['bitcoin',
 'ethereum',
 'litecoin',
 'bitcoin-cash',
 'dogecoin',
 'chainlink',
 'ripple',
 'uniswap',
 'matic-network',
 'aave',
 'solana',
 'tron',
 'stellar',
 'eos',
 'cardano',
 'polkadot',
 'dash',
 'omisego',
 'ethereum-classic',
 'cosmos',
 'sushi',
 'curve-dao-token',
 'shiba-inu',
 'usd-coin',
 'enjincoin',
 'binancecoin',
 'yearn-finance',
 'filecoin',
 'neo',
 'compound-governance-token',
 'zcash',
 'chiliz',
 'axie-infinity',
 '1inch',
 'the-graph',
 'maker',
 'algorand',
 'avalanche-2',
 'tezos',
 'vechain',
 'the-sandbox',
 'kusama',
 'dai',
 'qtum',
 'havven',
 'dydx',
 'internet-computer',
 'basic-attention-token',
 'bitcoin-cash-sv',
 'pancakeswap-token',
 'monero',
 'terra-luna',
 '0x',
 'decentraland',
 'near',
 'fantom',
 'theta-token',
 'bittorrent-2',
 'ftx-token',
 'binance-usd',
 'tether',
 'zencash',
 'true-usd',
 'republic-protocol',
 'mask-network',
 'ontology',
 'serum',
 'my-neighbor-alice',
 'smooth-love-potion',
 'ankr',
 'band-protocol',
 'balancer',


## Pull API Data for Exchange Volumes

In [62]:
ex_n = 0

In [65]:
print(ex_n)
# Pull Exchange Volumes for All Exchanges in list
days = 1097
for i in range(len(exchange_id_list) - ex_n):
    print(f'Gathering Data for {exchange_id_list[ex_n]}...')
    
    try:
        temp_data = cg.get_exchanges_volume_chart_by_id(id=exchange_id_list[ex_n], days=days)
        timestamps_list = []
        volume_list = []
        exchange_data = {}
        for item in temp_data:
            volume_list.append(item[1])
            timestamps_list.append(item[0])

        exchange_data['Date'] = timestamps_list
        exchange_data['Volume'] = volume_list
        temp_df = pd.DataFrame(exchange_data)
        temp_df['ExchangeID'] = exchange_id_list[ex_n]

        if ex_n == 0:
            ex_volume_df = temp_df
            ex_n += 1

        else:
            ex_volume_df = ex_volume_df.append(temp_df, ignore_index=True)
            ex_n += 1
        
        print(f'Successfully Added {exchange_id_list[ex_n - 1]}\n')
        
    except:
        print(f'Unable to continue for now due to API limit... Discontinued on {exchange_id_list[ex_n]}')
        break
    

print('Finished!')


0
Gathering Data for binance...
Successfully Added phemex

Gathering Data for binance...
Successfully Added phemex

Gathering Data for binance...
Unable to continue for now due to API limit... Discontinued on binance
Finished!


In [64]:
# View Table 
ex_volume_df

Unnamed: 0,Date,Volume,ExchangeID
0,1.540761e+12,111332.44984843043117924021674829954670980185,binance
1,1.540847e+12,107735.25148984774607281885799836730368314756,binance
2,1.540934e+12,76921.610533237920777900025484738993181059,binance
3,1.541020e+12,97194.22511152238796585961213290753502096792,binance
4,1.541107e+12,68898.87910397986255322016325575277456812775,binance
...,...,...,...
10869,1.635110e+12,48648.5422897560929189213436237041943884961664,crypto_com
10870,1.635196e+12,41143.562470413176549921582005917467952018255,crypto_com
10871,1.635283e+12,35566.846155046357536110358768722988990642393,crypto_com
10872,1.635369e+12,61269.45057724390711446875387453520171674322,crypto_com


## Analyze Exchange Volumes DataFrame

## Gather List of Coins

In [None]:
# Gather coin list
coins_list = cg.get_coins_list()
coins_list

In [None]:
# Convert Coins list into dataframe
coins_df = pd.DataFrame(coins_list).set_index('id')
coins_df

In [None]:
# Get OHLC data
ohlc_data = cg.get_coin_ohlc_by_id(id = 'bitcoin', vs_currency = currency, days = 'max')
ohlc_data

In [None]:
# Format OHLC Data as DataFrame
ohlc_df = pd.DataFrame(ohlc_data, columns = ['date', 'open', 'high', 'low', 'close'])

# Fix and Filter Dates
ohlc_df['date'] = ohlc_df['date'].apply(lambda x: int(str(x)[:-3]))
convert_timestamp(ohlc_df)
ohlc_df = ohlc_df.loc[(ohlc_df['date'] >= from_date) & (ohlc_df['date'] <= end_date)]

# Add Coin ID Columns and Format Columns
ohlc_df['coin_id'] = 'bitcoin'
ohlc_df = ohlc_df.set_index('date')
ohlc_df.rename(columns={'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'coin_id': 'CoinID'}, inplace=True)
ohlc_df.index.rename('Date', inplace=True)
ohlc_df

In [None]:
# Get Historical Market Data
raw_market_data = cg.get_coin_market_chart_by_id(id = 'bitcoin', vs_currency = currency, days = 'max')
raw_market_data

In [None]:
market_data = raw_market_data
# Clean Up the Data
columns = ['prices', 'market_caps', 'total_volumes']
n = 0
timestamp_list = []
for column in columns:
    temp_list = market_data[column]
    column_list = []
    for item in temp_list:
        timestamp_list.append(str(item.pop(0)))
        column_list.append(item.pop())
    
    market_data[column] = column_list
    if n == 0:
        market_data['date'] = [x[:-3] for x in timestamp_list]
        n+=1

In [None]:
# Format market data into df
market_df = pd.DataFrame(market_data)
market_df['date'] = market_df['date'].apply(lambda d: datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d'))
market_df['date'] = pd.to_datetime(market_df.date)
market_df = market_df.loc[market_df['date'] >= from_date]
market_df = market_df.set_index('date')
market_df['coin_id'] = 'bitcoin'
market_df