In [1]:
%matplotlib notebook
import requests
import json, time, datetime, math
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal, fftpack

#helper functions
def print_time(unix, msg=''):
    print(msg, time.ctime(int(unix)))
    
def get_readable_time(unix):
    return [time.ctime(int(u)) for u in unix]

def plot_corr(df):
    '''Function plots a graphical correlation matrix for each pair of columns in the dataframe.

    Input:
        df: pandas DataFrame
        size: vertical and horizontal size of the plot'''

    corr = df.corr()
    fig, ax = plt.subplots(figsize=(9,9))
    ax.matshow(corr)
    plt.xticks(range(len(corr.columns)), corr.columns, rotation=90);
    plt.yticks(range(len(corr.columns)), corr.columns);
    fig.show()

DATA_SOURCE = 'poloniex'
DATA_PATH = 'data/' + DATA_SOURCE + '/'
if DATA_SOURCE == 'poloniex':
    df_btc = pd.read_pickle(DATA_PATH + 'USDT_BTC.pkl')
    df_ltc = pd.read_pickle(DATA_PATH + 'USDT_LTC.pkl')
    df_eth = pd.read_pickle(DATA_PATH + 'USDT_ETH.pkl')
    df_xrp = pd.read_pickle(DATA_PATH + 'USDT_XRP.pkl')
else:
    data_df = pd.read_pickle(DATA_PATH + 'bitcoin.pkl')
    data_df_ltc = pd.read_pickle(DATA_PATH + 'litecoin.pkl')
    data_df_cap = pd.read_pickle(DATA_PATH + 'marketcap-total.pkl')



In [3]:
df_btc.info()
print('min', print_time(np.min(df_btc['date'])), np.min(df_btc['date']))
print('max', print_time(np.max(df_btc['date'])), np.max(df_btc['date']))
print(np.mean(df_btc['date'].diff()))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7430 entries, 0 to 7429
Data columns (total 4 columns):
close          7430 non-null float64
date           7430 non-null int64
quoteVolume    7430 non-null float64
volume         7430 non-null float64
dtypes: float64(3), int64(1)
memory usage: 232.3 KB
 Thu Feb 19 17:00:00 2015
min None 1424361600
 Wed Jul 11 22:00:00 2018
max None 1531339200
14400.0


In [4]:
df_btc.query('date >= 1451606400').head(50)

Unnamed: 0,close,date,quoteVolume,volume
1892,431.76,1451606400,0.196224,84.083356
1893,433.010137,1451620800,0.414543,179.471338
1894,432.62,1451635200,0.189767,81.880609
1895,431.0,1451649600,4.652019,2008.608664
1896,428.600003,1451664000,0.496768,214.171423
1897,434.99,1451678400,0.937936,406.811553
1898,431.35,1451692800,0.553158,240.674134
1899,432.22,1451707200,0.154693,66.69575
1900,432.5,1451721600,0.065171,28.18603
1901,432.48,1451736000,1.062122,460.17059


In [3]:
print("###BITCOIN")
#print(df_btc.describe())
print(df_btc.corr())

print("###LITECOIN")
#print(df_ltc.describe())
print(df_ltc.corr())

print("###ETHEREUM")
#print(df_eth.describe())
print(df_eth.corr())

print("###RIPPLE")
#print(df_xrp.describe())
print(df_xrp.corr())

"""
price_usd = data_df['price_usd'][0::100]
market_cap = data_df['market_cap'][0::100]
volume = data_df['volume_usd'][0::100]

fig = plt.figure()
axes = fig.add_subplot(111)
axes.plot(range(len(price_usd)), price_usd, label='Cena USD')
axes.plot(range(len(volume)), volume, label='Volume')
fig.show()
"""
#plt.plot(range(len(price_usd)), price_usd, label='Cena USD')
#plt.plot(range(len(market_cap)), market_cap, label='Market Cap')
#plt.plot(range(len(volume)), volume, label='Volume')
#plt.legend()
#plt.show()

###BITCOIN
                    close      date  quoteVolume    volume  weightedAverage
close            1.000000  0.724596     0.243441  0.552363         0.999994
date             0.724596  1.000000     0.378569  0.413114         0.724606
quoteVolume      0.243441  0.378569     1.000000  0.712121         0.243442
volume           0.552363  0.413114     0.712121  1.000000         0.552371
weightedAverage  0.999994  0.724606     0.243442  0.552371         1.000000
###LITECOIN
                    close      date  quoteVolume    volume  weightedAverage
close            1.000000  0.670333     0.098430  0.360765         0.999991
date             0.670333  1.000000     0.231301  0.286478         0.670312
quoteVolume      0.098430  0.231301     1.000000  0.641380         0.098341
volume           0.360765  0.286478     0.641380  1.000000         0.360573
weightedAverage  0.999991  0.670312     0.098341  0.360573         1.000000
###ETHEREUM
                    close      date  quoteVolume    v

"\nprice_usd = data_df['price_usd'][0::100]\nmarket_cap = data_df['market_cap'][0::100]\nvolume = data_df['volume_usd'][0::100]\n\nfig = plt.figure()\naxes = fig.add_subplot(111)\naxes.plot(range(len(price_usd)), price_usd, label='Cena USD')\naxes.plot(range(len(volume)), volume, label='Volume')\nfig.show()\n"

In [4]:
print(df_btc.describe())
print(70*'#')
print(df_ltc.describe())
print(70*'#')
print(df_eth.describe())
print(70*'#')
print(df_xrp.describe())

               close          date    quoteVolume        volume  \
count  327505.000000  3.275050e+05  327505.000000  3.275050e+05   
mean     2355.886818  1.473499e+09      12.714330  5.951428e+04   
std      3681.958009  2.836281e+07      32.789575  1.796844e+05   
min        99.000000  1.424373e+09       0.000000  0.000000e+00   
25%       371.000000  1.448936e+09       0.000000  0.000000e+00   
50%       645.198657  1.473499e+09       0.391221  2.454578e+02   
75%      2457.052952  1.498061e+09      11.732144  4.044794e+04   
max     19896.687300  1.522624e+09    1244.437950  8.905612e+06   

       weightedAverage  
count    327505.000000  
mean       2355.764074  
std        3681.758228  
min           0.000000  
25%         371.000000  
50%         645.263995  
75%        2456.965424  
max       19888.009976  
######################################################################
               close          date    quoteVolume        volume  \
count  322867.000000  3.228670e+0

In [4]:
plot_corr(df_btc)

<IPython.core.display.Javascript object>