In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

def get_quandl_data(quandl_id):
    '''Download and cache Quandl dataseries'''
    cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)
        print('Loaded {} from cache'.format(quandl_id))
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df = quandl.get(quandl_id, returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id, cache_path))
    return df

def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]

    return pd.DataFrame(series_dict)

def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))

    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels=not seperate_y_axis,
            type=scale
        )
    )

    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale)

    visibility = True
    if initial_hide:
        visibility = 'legendonly'

    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index,
            y=series,
            name=label_arr[index],
            visible=visibility
        )

        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

def get_json_data(json_url, cache_path):
    '''Download and cache JSON data, return as a dataframe'''
    try:
        f = open(cache_path, 'rb')
        df= pickle.load(f)
        print('Loaded {} from cache'.format(json_url))
    except (OSError, IOError) as e:
        print('Downloading {}'.format(json_url))
        df = pd.read_json(json_url)
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(json_url, cache_path))
    return df


base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'
start_date = datetime.strptime('2017-01-01', '%Y-%m-%d') # get data from the start of 2015
end_date = datetime.now() # up until today
pediod = 86400 # pull daily data (86,400 seconds per day)

def get_crypto_data(poloniex_pair):
    '''Retrieve cryptocurrency data from poloniex'''
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp(), end_date.timestamp(), pediod)
    data_df = get_json_data(json_url, poloniex_pair)
    data_df = data_df.set_index('date')
    return data_df

#Pull Kraken BTC price exchange data
btc_usd_price_kraken = get_quandl_data('BCHARTS/KRAKENUSD')
#btc_usd_price_kraken.head()
#Charting BTC prics
#btc_trade = go.Scatter(x=btc_usd_price_kraken.index, y=btc_usd_price_kraken['Weighted Price'])
#py.iplot([btc_trade])
exchanges = ['COINBASE', 'BITSTAMP', 'ITBIT']
exchange_data = {}
exchange_data['KRAKEN'] = btc_usd_price_kraken

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df

# Merge the BTC price dataseries' into a single dataframe
btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')
#btc_usd_datasets.tail()

# Remove "0" values
btc_usd_datasets.replace(0, np.nan, inplace=True)
# Plot all of the BTC exchange prices
#df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')

# Calculate the average BTC price as a new column
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis=1)

# Plot the average BTC price
#btc_trace = go.Scatter(x=btc_usd_datasets.index, y=btc_usd_datasets['avg_btc_price_usd'])
#py.iplot([btc_trace])

altcoins = ['ETH', 'LTC', 'XRP', 'ETC', 'STR', 'DASH', 'SC', 'XMR', 'XEM']

altcoin_data = {}
for altcoin in altcoins:
    coinpair = 'BTC_{}'.format(altcoin)
    crypto_price_df = get_crypto_data(coinpair)
    altcoin_data[altcoin] = crypto_price_df

# Calculate USD Price as a new column in each altcoin dataframe
for altcoin in altcoin_data.keys():
    altcoin_data[altcoin]['price_usd'] = altcoin_data[altcoin]['weightedAverage'] * btc_usd_datasets[
        'avg_btc_price_usd']

# Merge USD price of each altcoin into single dataframe
combined_df = merge_dfs_on_column(list(altcoin_data.values()), list(altcoin_data.keys()), 'price_usd')

# Add BTC price to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']

# Chart all of the altocoin prices
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')

Loaded BCHARTS/KRAKENUSD from cache
Loaded BCHARTS/COINBASEUSD from cache
Loaded BCHARTS/BITSTAMPUSD from cache
Loaded BCHARTS/ITBITUSD from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1483246800.0&end=1556376055.278098&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_LTC&start=1483246800.0&end=1556376055.278098&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XRP&start=1483246800.0&end=1556376055.278098&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=1483246800.0&end=1556376055.278098&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_STR&start=1483246800.0&end=1556376055.278098&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_DASH&start=1483246800.0&end=1556376055.278098&period

In [2]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2016
combined_df_2017 = combined_df[combined_df.index.year == 2017]
combined_df_2017.pct_change().corr(method='pearson')

Unnamed: 0,ETH,LTC,XRP,ETC,STR,DASH,SC,XMR,XEM,BTC
ETH,1.0,0.43806,0.211818,0.601278,0.259062,0.507222,0.373328,0.559516,0.398732,0.411678
LTC,0.43806,1.0,0.32428,0.482336,0.307829,0.340127,0.339122,0.438093,0.379563,0.420545
XRP,0.211818,0.32428,1.0,0.114394,0.509626,0.091291,0.244057,0.22991,0.267633,0.132151
ETC,0.601278,0.482336,0.114394,1.0,0.210146,0.387698,0.298527,0.450862,0.321531,0.417161
STR,0.259062,0.307829,0.509626,0.210146,1.0,0.183148,0.403104,0.330453,0.339189,0.231476
DASH,0.507222,0.340127,0.091291,0.387698,0.183148,1.0,0.291411,0.50004,0.326242,0.307077
SC,0.373328,0.339122,0.244057,0.298527,0.403104,0.291411,1.0,0.379812,0.331615,0.325314
XMR,0.559516,0.438093,0.22991,0.450862,0.330453,0.50004,0.379812,1.0,0.340217,0.408807
XEM,0.398732,0.379563,0.267633,0.321531,0.339189,0.326242,0.331615,0.340217,1.0,0.330361
BTC,0.411678,0.420545,0.132151,0.417161,0.231476,0.307077,0.325314,0.408807,0.330361,1.0


In [8]:
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').values,
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [9]:
correlation_heatmap(combined_df_2017.pct_change(), "Cryptocurrency Correlations in 2017")

In [10]:
combined_df_2018 = combined_df[combined_df.index.year == 2018]
combined_df_2018.pct_change().corr(method='pearson')

Unnamed: 0,ETH,LTC,XRP,ETC,STR,DASH,SC,XMR,XEM,BTC
ETH,1.0,0.817922,0.72077,0.789782,0.686034,0.802568,0.703624,0.820965,0.715311,0.821393
LTC,0.817922,1.0,0.700568,0.723558,0.687398,0.803747,0.732664,0.807216,0.672138,0.851405
XRP,0.72077,0.700568,1.0,0.640675,0.792483,0.685005,0.677713,0.693242,0.736187,0.714145
ETC,0.789782,0.723558,0.640675,1.0,0.61949,0.719158,0.613912,0.711497,0.627015,0.733121
STR,0.686034,0.687398,0.792483,0.61949,1.0,0.692077,0.696601,0.706943,0.752291,0.737675
DASH,0.802568,0.803747,0.685005,0.719158,0.692077,1.0,0.707462,0.822289,0.69169,0.802794
SC,0.703624,0.732664,0.677713,0.613912,0.696601,0.707462,1.0,0.712465,0.704682,0.753193
XMR,0.820965,0.807216,0.693242,0.711497,0.706943,0.822289,0.712465,1.0,0.712102,0.862529
XEM,0.715311,0.672138,0.736187,0.627015,0.752291,0.69169,0.704682,0.712102,1.0,0.720885
BTC,0.821393,0.851405,0.714145,0.733121,0.737675,0.802794,0.753193,0.862529,0.720885,1.0


In [11]:
correlation_heatmap(combined_df_2018.pct_change(), "Cryptocurrency Correlations in 2018")