In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime
import plotly.offline as py 
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)
%matplotlib inline 

In [2]:
file_path="C:\\Users\\skandhaswa001\\Desktop\\Reading\\Python\\Dataquest\\solutions-master\\cryptocurrencypricehistory\\"
crypto_currency=['bitcoin_cash','bitcoin','bitconnect','dash','ethereum','iota','litecoin','monero','nem','neo','numeraire','omisego','qtum','ripple','stratis','waves']

In [3]:
cryptcurr_data = {}

for cryp_curr in crypto_currency:
    file_name = '{}{}_price.csv'.format(file_path,cryp_curr)
    curr_df=pd.read_csv(file_name)
    curr_df['Date']=pd.to_datetime(curr_df['Date'])
    curr_df=curr_df.set_index('Date')
    curr_df['Weighted Price']=(curr_df['High']+curr_df['Low'] )/ 2
    cryptcurr_data[cryp_curr]=curr_df



In [4]:
def merge_dataframe(dataframes, labels, col):
    
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

In [5]:
Datasets = merge_dataframe(list(cryptcurr_data.values()), list(cryptcurr_data.keys()), 'Weighted Price')
Datasets.fillna(0, inplace=True)



In [6]:
def plot_scatter(df, title, y_axis_label='', scale='linear'):

    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= True,
            type=scale
        )
    )
    
     
        
    
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible='visible'
        )       
   
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)
   

In [7]:
Datasets=Datasets[Datasets.index >'2017-01-01']


In [8]:
plot_scatter(Datasets, 'Cryptocurrency Prices (USD)', y_axis_label='Coin Value (USD)', scale='linear')

In [13]:
plot_scatter(Datasets, 'Cryptocurrency Prices (USD)', y_axis_label='Coin Value (USD)', scale='log')

In [15]:
def scatter_corr(df,title,dim1_name,dim1_idx,dim2_name,dim2_idx):
    
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))

    trace0 = go.Scatter(
        x = series_arr[dim1_idx].index,
        y = series_arr[dim1_idx],
        name = dim1_name,
        mode = 'markers',
        marker = dict(
            size = 10,
            color = 'rgba(152, 0, 0, .8)',
            line = dict(
                width = 2,
                color = 'rgb(0, 0, 0)'
            )
        )
    )

    trace1 = go.Scatter(
        x = series_arr[dim2_idx].index,
        y = series_arr[dim2_idx],
        name = dim2_name,
        mode = 'markers',
        marker = dict(
            size = 10,
            color = 'rgba(255, 182, 193, .9)',
            line = dict(
                width = 2,
            )
        )
    )

    data = [trace0, trace1]

    layout = dict(title = title,
                  yaxis = dict(zeroline = False,type='log'),
                  xaxis = dict(zeroline = False)
                 )

    fig = dict(data=data, layout=layout)
    py.iplot(fig, filename='styled-scatter')

In [16]:
scatter_corr(Datasets,'Scatter Plot BTC-ETC','Bitcoin',0,'Ethereum',4)

In [17]:
Datasets.pct_change().corr(method='pearson')

Unnamed: 0,bitcoin,bitcoin_cash,bitconnect,dash,ethereum,iota,litecoin,monero,nem,neo,numeraire,omisego,qtum,ripple,stratis,waves
bitcoin,1.0,0.031438,0.095994,0.367291,0.458454,0.582965,0.485686,0.449843,0.437183,0.369821,0.197713,0.519033,0.540538,0.169031,0.44948,0.537804
bitcoin_cash,0.031438,1.0,0.193915,0.284088,0.220133,0.072415,0.275923,0.123927,0.379052,0.006814,0.12706,-0.117781,0.014825,0.148769,0.113088,0.200894
bitconnect,0.095994,0.193915,1.0,0.062305,0.051319,0.457258,0.075254,0.03418,-0.003506,0.039144,0.169121,0.392205,0.372128,0.052682,-0.00049,0.05399
dash,0.367291,0.284088,0.062305,1.0,0.504371,0.480164,0.297266,0.449071,0.309436,0.296094,0.149866,0.425215,0.423025,0.013665,0.361077,0.307751
ethereum,0.458454,0.220133,0.051319,0.504371,1.0,0.587733,0.344032,0.568524,0.385687,0.312342,0.235444,0.591026,0.409481,0.118255,0.416521,0.4375
iota,0.582965,0.072415,0.457258,0.480164,0.587733,1.0,0.424043,0.336631,0.579696,0.329709,0.379764,0.574527,0.522988,0.384365,0.559953,0.571778
litecoin,0.485686,0.275923,0.075254,0.297266,0.344032,0.424043,1.0,0.439178,0.330564,0.431297,0.119865,0.351427,0.445305,0.319453,0.418696,0.405616
monero,0.449843,0.123927,0.03418,0.449071,0.568524,0.336631,0.439178,1.0,0.349698,0.268717,0.137259,0.256132,0.370879,0.214908,0.39092,0.402016
nem,0.437183,0.379052,-0.003506,0.309436,0.385687,0.579696,0.330564,0.349698,1.0,0.225553,0.200497,0.52906,0.530287,0.231391,0.338027,0.331887
neo,0.369821,0.006814,0.039144,0.296094,0.312342,0.329709,0.431297,0.268717,0.225553,1.0,0.10001,0.513884,0.394238,0.120549,0.280814,0.306152


In [18]:
quandl_id='LBMA/GOLD'
cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
df = quandl.get(quandl_id, returns="pandas")
df.to_pickle(cache_path)

In [19]:
df=df.reset_index()
df['Date']=pd.to_datetime(df['Date'])
df=df.set_index('Date')


In [20]:
df=df[df.index > '2017-01-01']
df['Gold']=df['USD (AM)']+df['USD (PM)']/ 2
df=(df['Gold'])

In [21]:
df=pd.DataFrame(df)

In [22]:
Datasets=Datasets.merge(df, left_index=True, right_index=True, how='inner')


In [23]:
scatter_corr(Datasets,'Scatter Plot BTC-Gold','Bitcoin',0,'Gold',16)

In [24]:
#slice_col=['bitcoin','Gold']
#Datasets[slice_col].pct_change().corr(method='pearson')
Datasets.pct_change().corr(method='pearson')

Unnamed: 0,bitcoin,bitcoin_cash,bitconnect,dash,ethereum,iota,litecoin,monero,nem,neo,numeraire,omisego,qtum,ripple,stratis,waves,Gold
bitcoin,1.0,0.101732,0.085681,0.342403,0.423684,0.670103,0.440825,0.378348,0.35,0.35809,0.145969,0.398515,0.572838,0.224215,0.385726,0.539043,-0.00614
bitcoin_cash,0.101732,1.0,0.185941,0.240041,0.328794,0.112246,0.320019,0.159131,0.367011,-0.004691,0.305608,0.002689,0.016212,0.306548,0.209837,0.242448,0.10019
bitconnect,0.085681,0.185941,1.0,0.019449,0.027935,0.421,0.064867,0.032615,-0.011967,0.017971,0.127651,0.279898,0.492022,0.084082,0.00603,0.05949,0.067634
dash,0.342403,0.240041,0.019449,1.0,0.63627,0.367633,0.286696,0.592827,0.270001,0.287076,0.106122,0.465961,0.38754,-0.080802,0.261648,0.319695,0.052203
ethereum,0.423684,0.328794,0.027935,0.63627,1.0,0.586887,0.245765,0.547226,0.259647,0.22982,0.125805,0.518812,0.450783,-0.004677,0.43172,0.47666,0.06806
iota,0.670103,0.112246,0.421,0.367633,0.586887,1.0,0.34842,0.258166,0.480543,0.293249,0.182388,0.359856,0.492563,0.343866,0.484903,0.487714,-0.053298
litecoin,0.440825,0.320019,0.064867,0.286696,0.245765,0.34842,1.0,0.365921,0.309991,0.501767,0.085602,0.278821,0.421085,0.371172,0.319735,0.352179,-0.040152
monero,0.378348,0.159131,0.032615,0.592827,0.547226,0.258166,0.365921,1.0,0.362304,0.192536,0.129118,0.293945,0.419366,0.164204,0.333178,0.410611,0.13809
nem,0.35,0.367011,-0.011967,0.270001,0.259647,0.480543,0.309991,0.362304,1.0,0.135644,0.102192,0.554252,0.563828,0.432569,0.334274,0.313536,-0.050429
neo,0.35809,-0.004691,0.017971,0.287076,0.22982,0.293249,0.501767,0.192536,0.135644,1.0,0.052245,0.465369,0.226489,0.085343,0.135894,0.18452,-0.129065


In [25]:
file_name = '{}Google_Search_Bitcoin.csv'.format(file_path)
google_df=pd.read_csv(file_name)
google_df['Date']=pd.to_datetime(google_df['Date'])
google_df=google_df.set_index('Date')
google_df['Week']=google_df.index.week
google_df=google_df.set_index('Week')



In [26]:
google_Datasets=Datasets
google_Datasets['Week']=google_Datasets.index.week
google_Datasets=google_Datasets.set_index('Week')
google_search_datasets=google_Datasets.merge(google_df, left_index=True, right_index=True, how='inner')


In [27]:
scatter_corr(google_search_datasets,'Scatter Plot BTC-Google Search','Bitcoin',0,'Google Search',17)


In [28]:
google_search_datasets.pct_change().corr(method='pearson')

Unnamed: 0,bitcoin,bitcoin_cash,bitconnect,dash,ethereum,iota,litecoin,monero,nem,neo,numeraire,omisego,qtum,ripple,stratis,waves,Gold,Topic_proportion
bitcoin,1.0,0.107162,0.084597,0.349352,0.424539,0.672141,0.440908,0.383087,0.350354,0.351919,0.140955,0.409019,0.563604,0.223523,0.396215,0.540218,0.003978,0.041218
bitcoin_cash,0.107162,1.0,0.189689,0.23531,0.330381,0.10961,0.321179,0.156165,0.367919,0.000959,0.309535,-0.004488,0.024518,0.313663,0.203949,0.243712,0.093135,-0.040014
bitconnect,0.084597,0.189689,1.0,0.018795,0.027275,0.419735,0.064063,0.032332,-0.012948,0.016778,0.124076,0.285906,0.489088,0.083706,0.005266,0.058915,0.068342,0.003051
dash,0.349352,0.23531,0.018795,1.0,0.637337,0.367224,0.286698,0.592106,0.268854,0.29188,0.105623,0.457439,0.405447,-0.081022,0.255612,0.32027,0.044942,-0.068036
ethereum,0.424539,0.330381,0.027275,0.637337,1.0,0.586848,0.244879,0.547379,0.258653,0.229213,0.123574,0.521556,0.451705,-0.005232,0.432958,0.476242,0.068472,-0.117668
iota,0.672141,0.10961,0.419735,0.367224,0.586848,1.0,0.345853,0.257872,0.477354,0.290056,0.17841,0.354478,0.496859,0.343794,0.488259,0.487487,-0.05312,-0.187195
litecoin,0.440908,0.321179,0.064063,0.286698,0.244879,0.345853,1.0,0.366035,0.308872,0.501674,0.082195,0.279438,0.421272,0.370786,0.320604,0.351509,-0.039846,0.101689
monero,0.383087,0.156165,0.032332,0.592106,0.547379,0.257872,0.366035,1.0,0.36195,0.194995,0.129073,0.288784,0.430267,0.164441,0.331255,0.410885,0.135171,0.010878
nem,0.350354,0.367919,-0.012948,0.268854,0.258653,0.477354,0.308872,0.36195,1.0,0.134538,0.097921,0.553923,0.563932,0.432393,0.333764,0.312838,-0.05172,0.091289
neo,0.351919,0.000959,0.016778,0.29188,0.229213,0.290056,0.501674,0.194995,0.134538,1.0,0.043475,0.488457,0.216116,0.084006,0.141651,0.183393,-0.12252,0.032296


In [33]:
'''Plot a correlation heatmap for the entire dataframe'''
heatmap = go.Heatmap(
        z=google_search_datasets.corr(method='pearson').as_matrix(),
        x=google_search_datasets.columns,
        y=google_search_datasets.columns,
        colorbar=dict(title='Pearson Coefficient'),
)
    
layout = go.Layout(title='Heat Map')
heatmap['zmax'] = 1.0
heatmap['zmin'] = -1.0
        
fig = go.Figure(data=[heatmap], layout=layout)
py.iplot(fig)