In [89]:
# Import Dependencies and set variables
import pandas as pd
from pycoingecko import CoinGeckoAPI
from datetime import datetime

In [90]:
# Set Variables
currency = 'usd'
from_date = datetime(2015, 1, 1)
end_date = datetime(2021, 9, 30)
cg = CoinGeckoAPI()

In [91]:
# Functions
def convert_timestamp(df):
    df['date'] = df['date'].apply(lambda d: datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d'))
    df['date'] = pd.to_datetime(df.date)
    

In [92]:
# Gather list of exchanges
exchanges_list = cg.get_exchanges_list()
exchanges_list

[{'id': 'binance',
  'name': 'Binance',
  'year_established': 2017,
  'country': 'Cayman Islands',
  'description': '',
  'url': 'https://www.binance.com/',
  'image': 'https://assets.coingecko.com/markets/images/52/small/binance.jpg?1519353250',
  'has_trading_incentive': False,
  'trust_score': 10,
  'trust_score_rank': 1,
  'trade_volume_24h_btc': 486114.04964249086,
  'trade_volume_24h_btc_normalized': 486114.04964249086},
 {'id': 'gdax',
  'name': 'Coinbase Exchange',
  'year_established': 2012,
  'country': 'United States',
  'description': '',
  'url': 'https://www.coinbase.com',
  'image': 'https://assets.coingecko.com/markets/images/23/small/Coinbase_Coin_Primary.png?1621471875',
  'has_trading_incentive': False,
  'trust_score': 10,
  'trust_score_rank': 2,
  'trade_volume_24h_btc': 111890.63021187465,
  'trade_volume_24h_btc_normalized': 111890.63021187465},
 {'id': 'huobi',
  'name': 'Huobi Global',
  'year_established': 2013,
  'country': 'Seychelles',
  'description': '',

In [93]:
# Convert exchanges list into dataframe
exchanges_df = pd.DataFrame(exchanges_list).drop(columns=['description', 'url', 'image', 'has_trading_incentive',
    'trust_score', 'trust_score_rank', 'trade_volume_24h_btc_normalized']).set_index('id').\
    sort_values(by=['trade_volume_24h_btc'], ascending=False)

# Only keep top 25 exchanges in terms of trade_volume_24h_btc
exchanges_df = exchanges_df[exchanges_df['trade_volume_24h_btc']>19000]
exchanges_df

Unnamed: 0_level_0,name,year_established,country,trade_volume_24h_btc
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
binance,Binance,2017.0,Cayman Islands,486114.049642
okex,OKEx,2013.0,Belize,127050.647232
coinflex,CoinFLEX,2019.0,Seychelles,124652.179062
zbg,ZBG,2018.0,China,121535.490491
gdax,Coinbase Exchange,2012.0,United States,111890.630212
huobi,Huobi Global,2013.0,Seychelles,72745.663995
hitbtc,HitBTC,2013.0,,56072.836085
upbit,Upbit,2017.0,South Korea,54765.917305
kucoin,KuCoin,2014.0,Seychelles,54220.42861
bitcoin_com,FMFW.io,2019.0,Bahamas,53572.071541


In [94]:
# Gather list of Exchange IDs
exchange_id_list = list(exchanges_df.index)
print(len(exchange_id_list))
exchange_id_list

25


['binance',
 'okex',
 'coinflex',
 'zbg',
 'gdax',
 'huobi',
 'hitbtc',
 'upbit',
 'kucoin',
 'bitcoin_com',
 'changelly',
 'bitrue',
 'ftx_spot',
 'crypto_com',
 'gate',
 'bkex',
 'coinsbit',
 'xt',
 'zb',
 'bitmart',
 'uniswap',
 'digifinex',
 'kraken',
 'bithumb',
 'binance_us']

In [134]:
# Gather ticker data for exchanges in list
n = 0
for exchange in exchange_id_list:
    print(f'Gathering Data for {exchange}...')
    
    exchange_data = cg.get_exchanges_tickers_by_id(id = exchange)
    ticker_data = exchange_data['tickers']
    temp_df = pd.DataFrame(ticker_data)
    
    # Drop Columns
    columns_to_drop = ['market', 'bid_ask_spread_percentage',  'converted_last',
           'converted_volume', 'trust_score',
           'last_traded_at', 'last_fetch_at', 'is_anomaly',
           'is_stale', 'trade_url', 'token_info_url']
    temp_df = temp_df.drop(columns = columns_to_drop)
    
    # Format Columns
    temp_df['ExchangeID'] = exchange
    temp_df = temp_df.rename(columns = {'base': 'Ticker', 'last': 'LastPrice', 'volume': 'Volume', 
                                              'timestamp': 'Time', 'coin_id':'CoinID'})
    temp_df.index.rename('TickerID', inplace=True)
    
    if n == 0:
        tickers_df = temp_df
        n += 1
    else:
        tickers_df = tickers_df.append(temp_df, ignore_index=True)
        
print('Finished Gathering Data!')

Gathering Data for binance...
Gathering Data for okex...
Gathering Data for coinflex...
Gathering Data for zbg...
Gathering Data for gdax...
Gathering Data for huobi...
Gathering Data for hitbtc...
Gathering Data for upbit...
Gathering Data for kucoin...
Gathering Data for bitcoin_com...
Gathering Data for changelly...
Gathering Data for bitrue...
Gathering Data for ftx_spot...
Gathering Data for crypto_com...
Gathering Data for gate...
Gathering Data for bkex...
Gathering Data for coinsbit...
Gathering Data for xt...
Gathering Data for zb...
Gathering Data for bitmart...
Gathering Data for uniswap...
Gathering Data for digifinex...
Gathering Data for kraken...
Gathering Data for bithumb...
Gathering Data for binance_us...
Finished Gathering Data!


In [135]:
# View Tickers Table
tickers_df

Unnamed: 0,Ticker,target,LastPrice,Volume,Time,CoinID,target_coin_id,ExchangeID
0,BUSD,USDT,0.999900,9.383694e+08,2021-10-27T03:20:36+00:00,binance-usd,tether,binance
1,BTC,USDT,60595.010000,4.343153e+04,2021-10-27T03:12:20+00:00,bitcoin,tether,binance
2,ETH,USDT,4215.560000,4.223386e+05,2021-10-27T03:23:50+00:00,ethereum,tether,binance
3,SHIB,USDT,0.000049,6.256837e+13,2021-10-27T03:22:00+00:00,shiba-inu,tether,binance
4,BTC,BUSD,60550.000000,1.161179e+04,2021-10-27T03:08:50+00:00,bitcoin,binance-usd,binance
...,...,...,...,...,...,...,...,...
2390,ZRX,USD,1.055500,9.028792e+05,2021-10-27T03:22:28+00:00,0x,,binance_us
2391,ZEN,USD,84.819000,7.200591e+03,2021-10-27T03:22:28+00:00,zencash,,binance_us
2392,VET,BTC,0.000002,4.432036e+06,2021-10-27T03:22:28+00:00,vechain,bitcoin,binance_us
2393,ZEN,USDT,84.852000,5.472762e+03,2021-10-27T03:22:27+00:00,zencash,tether,binance_us


Unnamed: 0_level_0,Ticker,target,LastPrice,Volume,Time,CoinID,ExchangeID
TickerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,SHIB,USDT,0.000049,5.735930e+12,2021-10-27T02:50:50+00:00,shiba-inu,kraken
1,ETH,USDT,4170.640000,4.015821e+04,2021-10-27T02:48:45+00:00,ethereum,kraken
2,BTC,USDT,60520.408000,2.563854e+03,2021-10-27T02:50:54+00:00,bitcoin,kraken
3,DOGE,USDT,0.258105,2.604713e+08,2021-10-27T02:51:00+00:00,dogecoin,kraken
4,XRP,USDT,1.112530,6.220149e+07,2021-10-27T02:51:01+00:00,ripple,kraken
...,...,...,...,...,...,...,...
95,JST,USDT,0.081271,1.416636e+06,2021-10-27T02:48:46+00:00,just,kraken
96,REN,USDT,1.130900,5.348495e+05,2021-10-27T02:45:57+00:00,republic-protocol,kraken
97,EDEN,USDT,3.965900,4.872911e+05,2021-10-27T02:50:50+00:00,eden,kraken
98,MIR,USDT,2.991000,3.774174e+05,2021-10-27T02:50:56+00:00,mirror-protocol,kraken


In [98]:
tickers_df['target'].value_counts()

USDT    96
USD      3
Name: target, dtype: int64

In [99]:
# Gather coin list
coins_list = cg.get_coins_list()
coins_list

[{'id': '01coin', 'symbol': 'zoc', 'name': '01coin'},
 {'id': '0-5x-long-algorand-token',
  'symbol': 'algohalf',
  'name': '0.5X Long Algorand Token'},
 {'id': '0-5x-long-altcoin-index-token',
  'symbol': 'althalf',
  'name': '0.5X Long Altcoin Index Token'},
 {'id': '0-5x-long-balancer-token',
  'symbol': 'balhalf',
  'name': '0.5X Long Balancer Token'},
 {'id': '0-5x-long-bitcoin-cash-token',
  'symbol': 'bchhalf',
  'name': '0.5X Long Bitcoin Cash Token'},
 {'id': '0-5x-long-bitcoin-sv-token',
  'symbol': 'bsvhalf',
  'name': '0.5X Long Bitcoin SV Token'},
 {'id': '0-5x-long-bitcoin-token',
  'symbol': 'half',
  'name': '0.5X Long Bitcoin Token'},
 {'id': '0-5x-long-cardano-token',
  'symbol': 'adahalf',
  'name': '0.5X Long Cardano Token'},
 {'id': '0-5x-long-chainlink-token',
  'symbol': 'linkhalf',
  'name': '0.5X Long Chainlink Token'},
 {'id': '0-5x-long-cosmos-token',
  'symbol': 'atomhalf',
  'name': '0.5X Long Cosmos Token'},
 {'id': '0-5x-long-defi-index-token',
  'symbol'

In [100]:
# Convert Coins list into dataframe
coins_df = pd.DataFrame(coins_list).set_index('id')
coins_df

Unnamed: 0_level_0,symbol,name
id,Unnamed: 1_level_1,Unnamed: 2_level_1
01coin,zoc,01coin
0-5x-long-algorand-token,algohalf,0.5X Long Algorand Token
0-5x-long-altcoin-index-token,althalf,0.5X Long Altcoin Index Token
0-5x-long-balancer-token,balhalf,0.5X Long Balancer Token
0-5x-long-bitcoin-cash-token,bchhalf,0.5X Long Bitcoin Cash Token
...,...,...
zyro,zyro,Zyro
zytara-dollar,zusd,Zytara Dollar
zyx,zyx,ZYX
zzz-finance,zzz,zzz.finance


In [84]:
# Get OHLC data
ohlc_data = cg.get_coin_ohlc_by_id(id = 'bitcoin', vs_currency = currency, days = 'max')
ohlc_data

[[1367280000000, 135.3, 141.96, 135.3, 135.3],
 [1367539200000, 117.0, 117.0, 91.01, 91.01],
 [1367884800000, 111.25, 118.33, 106.4, 106.4],
 [1368230400000, 112.64, 118.78, 112.64, 113.01],
 [1368576000000, 114.71, 117.18, 114.16, 114.16],
 [1368921600000, 115.5, 123.88, 115.5, 120.5],
 [1369267200000, 122.58, 125.75, 122.58, 125.75],
 [1369612800000, 131.7, 134.6, 128.99, 128.99],
 [1369958400000, 129.18, 132.13, 127.4, 127.98],
 [1370217600000, 129.09, 129.09, 121.16, 121.16],
 [1370563200000, 121.31, 121.71, 119.0, 119.0],
 [1370908800000, 110.09, 110.09, 99.75, 105.99],
 [1371254400000, 108.9, 108.9, 98.4, 98.4],
 [1371600000000, 99.99, 106.55, 99.99, 106.55],
 [1371945600000, 107.9, 110.91, 107.25, 107.25],
 [1372291200000, 108.0, 108.0, 102.44, 103.39],
 [1372550400000, 99.66, 99.66, 93.33, 95.39],
 [1372809600000, 97.5, 97.5, 89.49, 90.8],
 [1373155200000, 78.8, 78.8, 67.81, 68.08],
 [1373500800000, 77.5, 85.75, 76.39, 85.75],
 [1373846400000, 93.8, 95.58, 88.06, 94.0],
 [13741

In [109]:
# Format OHLC Data as DataFrame
ohlc_df = pd.DataFrame(ohlc_data, columns = ['date', 'open', 'high', 'low', 'close'])

# Fix and Filter Dates
ohlc_df['date'] = ohlc_df['date'].apply(lambda x: int(str(x)[:-3]))
convert_timestamp(ohlc_df)
ohlc_df = ohlc_df.loc[(ohlc_df['date'] >= from_date) & (ohlc_df['date'] <= end_date)]

# Add Coin ID Columns and Format Columns
ohlc_df['coin_id'] = 'bitcoin'
ohlc_df = ohlc_df.set_index('date')
ohlc_df.rename(columns={'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'coin_id': 'CoinID'}, inplace=True)
ohlc_df.index.rename('Date', inplace=True)
ohlc_df

Unnamed: 0_level_0,Open,High,Low,Close,CoinID
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,313.99,314.45,286.57,286.57,bitcoin
2015-01-06,260.94,295.87,260.94,295.87,bitcoin
2015-01-10,284.45,289.57,267.18,267.18,bitcoin
2015-01-14,268.35,268.35,172.15,210.56,bitcoin
2015-01-18,205.82,214.55,199.49,214.55,bitcoin
...,...,...,...,...,...
2021-09-14,45196.49,47135.43,45139.83,47135.43,bitcoin
2021-09-18,48186.53,48266.63,47228.66,48266.63,bitcoin
2021-09-22,47371.04,47371.04,40386.62,43587.22,bitcoin
2021-09-26,44980.90,44980.90,42752.17,43336.86,bitcoin


In [86]:
# Get Historical Market Data
raw_market_data = cg.get_coin_market_chart_by_id(id = 'bitcoin', vs_currency = currency, days = 'max')
raw_market_data

{'prices': [[1367107200000, 135.3],
  [1367193600000, 141.96],
  [1367280000000, 135.3],
  [1367366400000, 117.0],
  [1367452800000, 103.43],
  [1367539200000, 91.01],
  [1367625600000, 111.25],
  [1367712000000, 116.79],
  [1367798400000, 118.33],
  [1367884800000, 106.4],
  [1367971200000, 112.64],
  [1368057600000, 113.0],
  [1368144000000, 118.78],
  [1368230400000, 113.01],
  [1368316800000, 114.713],
  [1368403200000, 117.18],
  [1368489600000, 114.5],
  [1368576000000, 114.156],
  [1368662400000, 115.5],
  [1368748800000, 123.1],
  [1368835200000, 123.88],
  [1368921600000, 120.501],
  [1369008000000, 122.58],
  [1369094400000, 122.9],
  [1369180800000, 123.0],
  [1369267200000, 125.748],
  [1369353600000, 131.7],
  [1369440000000, 130.77],
  [1369526400000, 134.6],
  [1369612800000, 128.985],
  [1369699200000, 129.179],
  [1369785600000, 132.13],
  [1369872000000, 127.401],
  [1369958400000, 127.98],
  [1370044800000, 129.09],
  [1370131200000, 121.3],
  [1370217600000, 121.16]

In [87]:
market_data = raw_market_data
# Clean Up the Data
columns = ['prices', 'market_caps', 'total_volumes']
n = 0
timestamp_list = []
for column in columns:
    temp_list = market_data[column]
    column_list = []
    for item in temp_list:
        timestamp_list.append(str(item.pop(0)))
        column_list.append(item.pop())
    
    market_data[column] = column_list
    if n == 0:
        market_data['date'] = [x[:-3] for x in timestamp_list]
        n+=1

In [88]:
# Format market data into df
market_df = pd.DataFrame(market_data)
market_df['date'] = market_df['date'].apply(lambda d: datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d'))
market_df['date'] = pd.to_datetime(market_df.date)
market_df = market_df.loc[market_df['date'] >= from_date]
market_df = market_df.set_index('date')
market_df['coin_id'] = 'bitcoin'
market_df

Unnamed: 0_level_0,prices,market_caps,total_volumes,coin_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-01,314.446000,4.301448e+09,3.885591e+07,bitcoin
2015-01-02,286.572000,3.921358e+09,1.187789e+08,bitcoin
2015-01-03,260.936000,3.571640e+09,2.055001e+08,bitcoin
2015-01-04,273.220000,3.740880e+09,1.550381e+08,bitcoin
2015-01-05,285.373800,3.908408e+09,9.700290e+07,bitcoin
...,...,...,...,...
2021-10-21,62541.086933,1.178927e+12,4.870996e+10,bitcoin
2021-10-22,61029.270932,1.151509e+12,4.076646e+10,bitcoin
2021-10-23,61572.140434,1.161166e+12,2.850504e+10,bitcoin
2021-10-24,61173.166391,1.153315e+12,2.998254e+10,bitcoin
