In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import time , datetime , date

import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as off


In [2]:
def analyze(prod_name, df  , features ):

    data = []

    ff = go.Scatter(
        x=df.index,
        y=df[features],
        name = prod_name,
        line = dict(color = '#17BECF'),
        opacity = 0.8)
    data.append(ff)
        
    layout = dict(
        title=prod_name,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label='1m',
                         step='month',
                         stepmode='backward'),
                    dict(count=6,
                         label='6m',
                         step='month',
                         stepmode='backward'),
                    dict(step='all')
                ])
            ),
            rangeslider=dict(),
            type='date'
        )
    )

    fig = dict(data=data, layout=layout)
    off.plot(fig , filename= prod_name + '.html')


In [3]:
pickle_storage = './dataframes/'
pickle_extension = '.pkl'

def get_pickle(dbname,name):
    filename = pickle_storage + dbname + '_'+ name + pickle_extension
    return pd.read_pickle(filename)

def put_pickle(dbname,name, frame):
    filename = pickle_storage  + dbname + '_'+ name + pickle_extension
    frame.to_pickle(filename)

## Cargando los datos de Pickle (esto se ejecuta una sola vez por la latencia de la red)


Los libros que nos interesa son:
<li>bitfinex_btcusd</li>
<li>bitfinex_xrpbtc</li>
<li>bitfinex_xrpusd</li>

In [43]:
df_disc_1_sec_bitfinex_btcusd = get_pickle('discretized_1_second','bitfinex_btcusd')
df_disc_1_sec_bitfinex_xrpbtc = get_pickle('discretized_1_second','bitfinex_xrpbtc')
df_disc_1_sec_bitfinex_xrpusd = get_pickle('discretized_1_second','bitfinex_xrpusd')

## Exploramos los datos

In [24]:
print (len(df_disc_1_sec_bitfinex_btcusd))
print (len(df_disc_1_sec_bitfinex_xrpbtc))
print (len(df_disc_1_sec_bitfinex_xrpusd))

540245
536394
536688


In [11]:
analyze('df_disc_1_sec_bitfinex_btcusd' , df_disc_1_sec_bitfinex_btcusd.set_index(['start']) , 'initial_ask_price')

In [12]:
analyze('df_disc_1_sec_bitfinex_xrpbtc' , df_disc_1_sec_bitfinex_xrpbtc.set_index(['start']) , 'initial_ask_price')

In [13]:
analyze('df_disc_1_sec_bitfinex_xrpusd' , df_disc_1_sec_bitfinex_xrpusd.set_index(['start']) , 'initial_ask_price')

### Como la los datos tienen valores incompletos para los dias anteriores al 18 de enero truncamos estos registros dejando unicamente las secuencias completas

In [44]:
df_disc_1_sec_bitfinex_btcusd = df_disc_1_sec_bitfinex_btcusd[df_disc_1_sec_bitfinex_btcusd.start >= '2018-01-18']
df_disc_1_sec_bitfinex_xrpbtc = df_disc_1_sec_bitfinex_xrpbtc[df_disc_1_sec_bitfinex_xrpbtc.start >= '2018-01-18']
df_disc_1_sec_bitfinex_xrpusd = df_disc_1_sec_bitfinex_xrpusd[df_disc_1_sec_bitfinex_xrpusd.start >= '2018-01-18']

In [59]:
print ('df_disc_1_sec_bitfinex_btcusd:' , len(df_disc_1_sec_bitfinex_btcusd))
print ('df_disc_1_sec_bitfinex_xrpbtc:' , len(df_disc_1_sec_bitfinex_xrpbtc))
print ('df_disc_1_sec_bitfinex_xrpusd:' , len(df_disc_1_sec_bitfinex_xrpusd))

df_disc_1_sec_bitfinex_btcusd: 491158
df_disc_1_sec_bitfinex_xrpbtc: 487543
df_disc_1_sec_bitfinex_xrpusd: 488037


#### Agregamos indices

In [46]:
df_disc_1_sec_bitfinex_btcusd = df_disc_1_sec_bitfinex_btcusd.set_index(['start'])
df_disc_1_sec_bitfinex_xrpbtc = df_disc_1_sec_bitfinex_xrpbtc.set_index(['start'])
df_disc_1_sec_bitfinex_xrpusd = df_disc_1_sec_bitfinex_xrpusd.set_index(['start']) 

#### Visualizamos

In [27]:
analyze('df_disc_1_sec_bitfinex_btcusd' , df_disc_1_sec_bitfinex_btcusd , 'initial_ask_price')
analyze('df_disc_1_sec_bitfinex_xrpbtc' , df_disc_1_sec_bitfinex_xrpbtc , 'initial_ask_price')
analyze('df_disc_1_sec_bitfinex_xrpusd' , df_disc_1_sec_bitfinex_xrpusd , 'initial_ask_price')

### Acomodo los nombres de las columnas para hacer un join entre los frames

In [42]:
def add_key_to_columns_names(df , columns, key):
    df = df.copy()
    for c in columns:
        df[key + '_' + c ] = df[c]
        del df[c]
    return df

def prepare_frame_to_join(df,columns, key):
    df = df.copy()
    del df['end']
    return add_key_to_columns_names(df , columns, key)

In [47]:
df_disc_1_sec_bitfinex_btcusd_pre_join = prepare_frame_to_join(df_disc_1_sec_bitfinex_btcusd , ['initial_ask_price' , 'initial_bid_price' , 'final_ask_price' , 'final_bid_price' ] , 'btcusd')
df_disc_1_sec_bitfinex_xrpbtc_pre_join = prepare_frame_to_join(df_disc_1_sec_bitfinex_xrpbtc , ['initial_ask_price' , 'initial_bid_price' , 'final_ask_price' , 'final_bid_price' ] , 'xrpbtc')
df_disc_1_sec_bitfinex_xrpusd_pre_join = prepare_frame_to_join(df_disc_1_sec_bitfinex_xrpusd , ['initial_ask_price' , 'initial_bid_price' , 'final_ask_price' , 'final_bid_price' ] , 'xrpusd')

#### Joineamos

In [50]:
df_joined = pd.concat([df_disc_1_sec_bitfinex_btcusd_pre_join, df_disc_1_sec_bitfinex_xrpbtc_pre_join , 
                       df_disc_1_sec_bitfinex_xrpusd_pre_join], axis=1, join='inner')

In [51]:
df_joined.head()

Unnamed: 0_level_0,btcusd_initial_ask_price,btcusd_initial_bid_price,btcusd_final_ask_price,btcusd_final_bid_price,xrpbtc_initial_ask_price,xrpbtc_initial_bid_price,xrpbtc_final_ask_price,xrpbtc_final_bid_price,xrpusd_initial_ask_price,xrpusd_initial_bid_price,xrpusd_final_ask_price,xrpusd_final_bid_price
start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-18 09:11:02,11712.0,11711.0,11712.0,11711.0,0.000132,0.000131,0.000132,0.000131,1.5338,1.5324,1.5338,1.5324
2018-01-18 09:11:03,11712.0,11711.0,11712.0,11709.0,0.000132,0.000131,0.000132,0.000131,1.5338,1.5324,1.5338,1.5324
2018-01-18 09:11:04,11712.0,11706.0,11709.0,11702.0,0.000132,0.000131,0.000132,0.000131,1.5338,1.5324,1.5337,1.5324
2018-01-18 09:11:05,11709.0,11699.0,11708.0,11699.0,0.000132,0.000131,0.000131,0.000131,1.5337,1.5302,1.53,1.5296
2018-01-18 09:11:06,11708.0,11699.0,11707.0,11703.0,0.000131,0.000131,0.000131,0.000131,1.53,1.5296,1.53,1.5296


In [62]:
print('df_joined:' , len(df_joined))


df_joined: 467960


#### Nota: El sub conjunto de los datos coincide con el frame menor y se han eliminado datos intermedios, posiblemente por ausencia de cotizaciones en alguno de los libros

#### Guardamos en Pickle:

In [63]:
put_pickle('joined_1_second','btcusd_xrpbtc_xrpusd', df_joined)