In [1]:
import pandas as pd
import os
import json
import numpy as np
import seaborn as sns
import tweepy
import datetime
import matplotlib.pyplot as plt
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from dateutil.parser import parse
import matplotlib.dates as mdates
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

In [2]:
def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

In [3]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [4]:
crypto_data = os.path.join("crypto-markets.csv")
crypto_data_df = pd.read_csv(crypto_data)

In [5]:
crypto_data_df.head()

Unnamed: 0,slug,symbol,name,date,ranknow,open,high,low,close,volume,market,close_ratio,spread
0,bitcoin,BTC,Bitcoin,2013-04-28,1,135.3,135.98,132.1,134.21,0,1500520000,0.5438,3.88
1,bitcoin,BTC,Bitcoin,2013-04-29,1,134.44,147.49,134.0,144.54,0,1491160000,0.7813,13.49
2,bitcoin,BTC,Bitcoin,2013-04-30,1,144.0,146.93,134.05,139.0,0,1597780000,0.3843,12.88
3,bitcoin,BTC,Bitcoin,2013-05-01,1,139.0,139.89,107.72,116.99,0,1542820000,0.2882,32.17
4,bitcoin,BTC,Bitcoin,2013-05-02,1,116.38,125.6,92.28,105.21,0,1292190000,0.3881,33.32


In [6]:
crypto_seventeen = crypto_data_df[(crypto_data_df["date"] >= "2017-01-01") & (crypto_data_df["date"] <= '2018-01-28')] 
crypto_seventeen.head()

Unnamed: 0,slug,symbol,name,date,ranknow,open,high,low,close,volume,market,close_ratio,spread
1344,bitcoin,BTC,Bitcoin,2017-01-01,1,963.66,1003.08,958.7,998.33,147775000,15491200000,0.893,44.38
1345,bitcoin,BTC,Bitcoin,2017-01-02,1,998.62,1031.39,996.7,1021.75,222185000,16055100000,0.7221,34.69
1346,bitcoin,BTC,Bitcoin,2017-01-03,1,1021.6,1044.08,1021.6,1043.84,185168000,16426600000,0.9893,22.48
1347,bitcoin,BTC,Bitcoin,2017-01-04,1,1044.4,1159.42,1044.4,1154.73,344946000,16795400000,0.9592,115.02
1348,bitcoin,BTC,Bitcoin,2017-01-05,1,1156.73,1191.1,910.42,1013.38,510199000,18604000000,0.3668,280.68


In [7]:
crypto_seventeen.count()

slug           347360
symbol         347360
name           347360
date           347360
ranknow        347360
open           347360
high           347360
low            347360
close          347360
volume         347360
market         347360
close_ratio    344794
spread         347360
dtype: int64

In [8]:
top_ten = crypto_seventeen[(crypto_seventeen["ranknow"] <= 10)] 
top_ten["name"].value_counts()

Ethereum        392
Litecoin        392
NEO             392
NEM             392
Ripple          392
Stellar         392
Bitcoin         392
EOS             211
Bitcoin Cash    189
Cardano         119
Name: name, dtype: int64

In [9]:
top_ten.head()
top_ten.to_csv("top_ten_currencies.csv")

In [32]:
currencies = top_ten["symbol"].unique()
currencies


array(['BTC', 'ETH', 'XRP', 'BCH', 'ADA', 'XLM', 'LTC', 'NEO', 'EOS', 'XEM'], dtype=object)

In [33]:
currency_data = {}
for currency in currencies:
    crypto_price_df = top_ten[top_ten["symbol"] == currency]
    crypto_price_df.set_index("date", inplace=True)
    currency_data[currency] = crypto_price_df
    

In [34]:
currency_data

{'ADA':                slug symbol     name  ranknow      open      high       low  \
 date                                                                         
 2017-10-01  cardano    ADA  Cardano        5  0.021678  0.032226  0.017354   
 2017-10-02  cardano    ADA  Cardano        5  0.024607  0.030088  0.019969   
 2017-10-03  cardano    ADA  Cardano        5  0.025757  0.027425  0.020690   
 2017-10-04  cardano    ADA  Cardano        5  0.020864  0.022806  0.020864   
 2017-10-05  cardano    ADA  Cardano        5  0.021951  0.022154  0.020859   
 2017-10-06  cardano    ADA  Cardano        5  0.021359  0.021542  0.018360   
 2017-10-07  cardano    ADA  Cardano        5  0.018414  0.021050  0.017620   
 2017-10-08  cardano    ADA  Cardano        5  0.020929  0.023598  0.020147   
 2017-10-09  cardano    ADA  Cardano        5  0.020344  0.022807  0.020342   
 2017-10-10  cardano    ADA  Cardano        5  0.022112  0.022446  0.021279   
 2017-10-11  cardano    ADA  Cardano        5

In [35]:
combined_df = merge_dfs_on_column(list(currency_data.values()), list(currency_data.keys()), 'close')

In [36]:
combined_df.head()

Unnamed: 0,ADA,BCH,BTC,EOS,ETH,LTC,NEO,XEM,XLM,XRP
2017-01-01,,,998.33,,8.17,4.51,0.141841,0.00344,0.002481,0.006368
2017-01-02,,,1021.75,,8.38,4.65,0.145642,0.003377,0.002477,0.006311
2017-01-03,,,1043.84,,9.73,4.63,0.140422,0.003573,0.002554,0.006386
2017-01-04,,,1154.73,,11.25,4.84,0.136734,0.003766,0.002735,0.00657
2017-01-05,,,1013.38,,10.25,4.29,0.13107,0.003638,0.002598,0.006201


In [37]:
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')

In [18]:
combined_df.pct_change().corr(method='pearson')

Unnamed: 0,ADA,BCH,BTC,EOS,ETH,LTC,NEO,XEM,XLM,XRP
ADA,1.0,0.034966,0.20093,0.17202,0.23591,0.180699,0.247575,0.256108,0.418911,0.447704
BCH,0.034966,1.0,0.122153,0.285166,0.311463,0.224193,0.166066,0.204476,0.092444,0.167548
BTC,0.20093,0.122153,1.0,0.286311,0.380272,0.400781,0.232765,0.223135,0.231785,0.135894
EOS,0.17202,0.285166,0.286311,1.0,0.405731,0.316504,0.237173,0.234753,0.235597,0.234207
ETH,0.23591,0.311463,0.380272,0.405731,1.0,0.361214,0.282201,0.308023,0.217394,0.131059
LTC,0.180699,0.224193,0.400781,0.316504,0.361214,1.0,0.275524,0.3123,0.274177,0.229618
NEO,0.247575,0.166066,0.232765,0.237173,0.282201,0.275524,1.0,0.177701,0.168576,0.086971
XEM,0.256108,0.204476,0.223135,0.234753,0.308023,0.3123,0.177701,1.0,0.290582,0.181385
XLM,0.418911,0.092444,0.231785,0.235597,0.217394,0.274177,0.168576,0.290582,1.0,0.483858
XRP,0.447704,0.167548,0.135894,0.234207,0.131059,0.229618,0.086971,0.181385,0.483858,1.0


In [19]:
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [20]:
correlation_heatmap(combined_df.pct_change(), "Cryptocurrency Correlations in 2017")