In [None]:
# Developed by Sagar Khatri
# Assignment 1 AI
# Bsc. Computing (Networking)
# Nami College

In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime

In [2]:
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

In [3]:
def get_quandl_data(quandl_id):
    '''Download and cache Quandl dataseries'''
    cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print('Loaded {} from cache'.format(quandl_id))
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df = quandl.get(quandl_id, returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id, cache_path))
    return df

In [4]:
# Pull Kraken BTC price exchange data
btc_usd_price_kraken = get_quandl_data('BCHARTS/KRAKENUSD')

Loaded BCHARTS/KRAKENUSD from cache


In [30]:
btc_usd_price_kraken.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-05,3823.6,3874.7,3772.1,3796.6,3545.082095,13596090.0,3835.198311
2019-01-06,3795.2,4083.9,3760.4,4041.1,7954.131913,31519640.0,3962.675064
2019-01-07,4041.0,4078.9,3965.0,4005.3,5303.54327,21323720.0,4020.654867
2019-01-08,4003.7,4113.9,3935.0,3992.9,8106.906295,32578360.0,4018.592877
2019-01-09,3993.0,4039.8,3966.0,3999.9,5461.811141,21887120.0,4007.301666
2019-01-10,4002.2,4035.5,3550.0,3623.5,13261.94888,49754470.0,3751.67078
2019-01-11,3623.5,3694.9,3560.5,3635.1,9804.823105,35617690.0,3632.670111
2019-01-12,3635.1,3652.4,3562.7,3616.5,2885.956979,10456220.0,3623.13757
2019-01-13,3616.5,3644.8,3375.0,3512.6,5567.970655,19702360.0,3538.517308
2019-01-14,3514.9,3516.4,3513.6,3515.0,2.413596,8486.439,3516.097285


In [6]:
# Chart the BTC pricing data
btc_trace = go.Scatter(x=btc_usd_price_kraken.index, y=btc_usd_price_kraken['Weighted Price'])
py.iplot([btc_trace])

In [7]:
# Pull pricing data for 2 more BTC exchanges
exchanges = ['BITSTAMP','ITBIT']

exchange_data = {}

exchange_data['KRAKEN'] = btc_usd_price_kraken

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df

Loaded BCHARTS/BITSTAMPUSD from cache
Loaded BCHARTS/ITBITUSD from cache


In [8]:
def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

In [9]:
# Merge the BTC price dataseries' into a single dataframe
btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')

In [10]:
btc_usd_datasets.tail()

Unnamed: 0_level_0,KRAKEN,BITSTAMP,ITBIT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-10,3751.67078,3759.974781,3757.47927
2019-01-11,3632.670111,3634.844253,3634.610639
2019-01-12,3623.13757,3622.412314,3619.25356
2019-01-13,3538.517308,3543.077202,3547.967425
2019-01-14,3516.097285,3512.941093,3516.161691


In [11]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [12]:
# Calculate the average BTC price as a new column
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis=1)

In [13]:
# Plot the average BTC price
btc_trace = go.Scatter(x=btc_usd_datasets.index, y=btc_usd_datasets['avg_btc_price_usd'])
py.iplot([btc_trace])

In [14]:
def get_json_data(json_url, cache_path):
    '''Download and cache JSON data, return as a dataframe.'''
    try:        
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print('Loaded {} from cache'.format(json_url))
    except (OSError, IOError) as e:
        print('Downloading {}'.format(json_url))
        df = pd.read_json(json_url)
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(json_url, cache_path))
    return df

In [34]:
base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'

# Ask user to input the date from which data get started from the blockchain.
userIn = input("Type Date yy-mm-dd: ")

start_date = datetime.strptime(userIn, '%Y-%m-%d')
end_date = datetime.now() # up until today
pediod = 86400 # pull daily data (86,400 seconds per day)

def get_crypto_data(poloniex_pair):
    '''Retrieve cryptocurrency data from poloniex'''
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp, end_date.timestamp(), pediod)
    data_df = get_json_data(json_url, poloniex_pair)
    data_df = data_df.set_index('date')
    return data_df


Type Date yy-mm-dd: 2015-01-01


In [16]:
altcoins = ['ETH','LTC','XRP','ETC','STR']

altcoin_data = {}
for altcoin in altcoins:
    coinpair = 'BTC_{}'.format(altcoin)
    crypto_price_df = get_crypto_data(coinpair)
    altcoin_data[altcoin] = crypto_price_df

Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=<built-in method timestamp of datetime.datetime object at 0x0000022A6057FC88>&end=1547523799.254836&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_LTC&start=<built-in method timestamp of datetime.datetime object at 0x0000022A6057FC88>&end=1547523799.254836&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XRP&start=<built-in method timestamp of datetime.datetime object at 0x0000022A6057FC88>&end=1547523799.254836&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=<built-in method timestamp of datetime.datetime object at 0x0000022A6057FC88>&end=1547523799.254836&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_STR&start=<built-in method timestamp of datetime.datetime object at 0x0000022A6057FC

In [17]:
altcoin_data['ETH'].tail()

Unnamed: 0_level_0,close,high,low,open,quoteVolume,volume,weightedAverage
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-10,0.034866,0.03743,0.034087,0.037339,46384.958905,1636.114165,0.035273
2019-01-11,0.03458,0.035295,0.0337,0.034841,15340.528599,527.570951,0.034391
2019-01-12,0.034394,0.03495,0.034156,0.034619,22011.84585,761.908622,0.034614
2019-01-13,0.03284,0.034465,0.0324,0.03438,23783.862832,795.565582,0.03345
2019-01-14,0.034845,0.035483,0.032831,0.032831,17473.282872,598.066141,0.034227


In [18]:
# Calculate USD Price as a new column in each altcoin dataframe
for altcoin in altcoin_data.keys():
    altcoin_data[altcoin]['price_usd'] =  altcoin_data[altcoin]['weightedAverage'] * btc_usd_datasets['avg_btc_price_usd']

In [19]:
# Merge USD price of each altcoin into single dataframe 
combined_df = merge_dfs_on_column(list(altcoin_data.values()), list(altcoin_data.keys()), 'price_usd')

In [20]:
# Add BTC price to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']

In [21]:
# Chart all of the altocoin prices
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')

In [35]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2016
combined_df_2016 = combined_df[combined_df.index.year == 2016]
combined_df_2016.pct_change().corr(method='pearson')

Unnamed: 0,ETH,LTC,XRP,ETC,STR,BTC
ETH,1.0,0.570258,0.527555,-0.180267,0.425369,0.616232
LTC,0.570258,1.0,0.620975,-0.133518,0.556037,0.942242
XRP,0.527555,0.620975,1.0,-0.055498,0.599076,0.648446
ETC,-0.180267,-0.133518,-0.055498,1.0,-0.102784,-0.173148
STR,0.425369,0.556037,0.599076,-0.102784,1.0,0.561899
BTC,0.616232,0.942242,0.648446,-0.173148,0.561899,1.0


In [36]:
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [38]:
correlation_heatmap(combined_df_2016.pct_change(), "Cryptocurrency Correlations in 2016")


Method .as_matrix will be removed in a future version. Use .values instead.



In [39]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2018
combined_df_2018 = combined_df[combined_df.index.year == 2018]
combined_df_2018.pct_change().corr(method='pearson')

Unnamed: 0,ETH,LTC,XRP,ETC,STR,BTC
ETH,1.0,0.972103,0.943972,0.961969,0.918112,0.974497
LTC,0.972103,1.0,0.940943,0.950435,0.918725,0.978083
XRP,0.943972,0.940943,1.0,0.925734,0.945278,0.942306
ETC,0.961969,0.950435,0.925734,1.0,0.901869,0.952751
STR,0.918112,0.918725,0.945278,0.901869,1.0,0.922171
BTC,0.974497,0.978083,0.942306,0.952751,0.922171,1.0


In [None]:
# (Triest, 2017), (Cdn.patricktriest.com, 2017)