In [3]:
from datetime import time , datetime , date
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import psycopg2
import sys
import pprint

import pymongo
from pymongo import MongoClient

## Lectura y escritura de dataframes a archivos Pickle

In [4]:
pickle_storage = './dataframes/'
pickle_extension = '.pkl'

def get_pickle(dbname,name):
    filename = pickle_storage + dbname + '_'+ name + pickle_extension
    return pd.read_pickle(filename)

def put_pickle(dbname,name, frame):
    filename = pickle_storage  + dbname + '_'+ name + pickle_extension
    frame.to_pickle(filename)

## Conectando con MongoDB

In [5]:
class LabelMetadata:
    intervals = None
    minDate = None
    maxDate = None
    minTime = None
    maxTime = None
    outputName = None
    table = None
    labelName = None
    requireAdvances = False
    
    def getMinDateTime(self):
        return get_date_time(self.minDate , self.minTime)
    
    def getMaxDateTime(self):
        return get_date_time(self.maxDate , self.maxTime)
    
def get_date_time(date,time):
    pattern = '%Y-%m-%d %H:%M:%S'
    return datetime.strptime(date + ' ' + time , pattern)

def get_interval_description(i):
    text = i['text']
    
    max_x = i['x_max']
    min_x = i['x_min']
    points = i['points']
    points = map(lambda p: ( str(p['x']) , float(p['y']) ) , points)
    
    return (str(min_x) , str(max_x))

def get_metadata_object(name):
    labels_collection = db.labels
    labels_cursor = labels_collection.find({"name": name})
    elements = []
    for l in labels_cursor:
        elements.append(l)
    return elements[0]

def get_label_intervals_by_name(name):
    metadata_element = get_metadata_object(name)
    intervals = metadata_element['intervals']
    return intervals;

def get_datetime_pairs(x):
    d1 = datetime(x[0].year, x[0].month, x[0].day, x[0].hour, x[0].minute, x[0].second )
    d2 = datetime(x[1].year, x[1].month, x[1].day, x[1].hour, x[1].minute, x[1].second )
    return (d1,d2)

def get_labels_by_name(intervals):
    interval_descriptions = map(lambda i : get_interval_description(i) , intervals)
    pattern = '%Y-%m-%d %H:%M:%S.%f'
    interval_descriptions= map( lambda x: (datetime.strptime(x[0],pattern) , datetime.strptime(x[1] , pattern))  
                               ,interval_descriptions)
    
    return map(lambda x: get_datetime_pairs(x) ,interval_descriptions)
    
def get_metadata(name):
    
    ## mongo metadata stored
    metadata_element = get_metadata_object(name)
    
    metadata = LabelMetadata()
    metadata.intervals = get_labels_by_name(metadata_element['intervals'])
    metadata.table = str(metadata_element['table'])
    metadata.minDate = str(metadata_element['minDate'])
    metadata.maxDate = str(metadata_element['maxDate'])
    metadata.minTime = str(metadata_element['minTime'])
    metadata.maxTime = str(metadata_element['maxTime'])
    metadata.labelName = str(metadata_element['name'])
    metadata.outputName = str(metadata_element['outputName'])
    metadata.minLimit = get_date_time(metadata.minDate , metadata.minTime)
    metadata.maxLimit = get_date_time(metadata.maxDate , metadata.maxTime)
    metadata.requireAdvances = bool(metadata_element['requireAdvances'])
    
    return metadata
    
def get_mongo_data_frame(name):
    return pd.DataFrame( list( db[ name ].find() ))

def convert_fields_to_datetime(df,fields):
    df = df.copy()
    
    for f in fields:
        col_vals = df[f].values
        new_col_vals = map(lambda x : pd.Timestamp(x).to_pydatetime() , col_vals)
        df[f] = new_col_vals
    
    return df

def put_mongo_data_frame(name , dataframe):
    db.drop_collection(name)
    db[name].insert_many(dataframe.to_dict('records'))

In [6]:
client = MongoClient()
db = client.DS

## Conectando con PostgreSQL

In [7]:
def connect(host , db):
    #conn_string = "host='192.168.1.3' dbname='gtrader' user='postgres' password='postgres'"
    conn_string = "host='" + host +"' dbname='" + db + "' user='postgres' password='postgres'"
    print "Connecting to database:",  (conn_string)
    conn = psycopg2.connect(conn_string)
    print "Connected!"
    return conn

def analyze(prod_name, df  , features ):

    data = []
   
    ff = go.Scatter(
        x=df.index,
        y=df[features],
        name = prod_name,
        line = dict(color = '#17BECF'),
        opacity = 0.8)
    data.append(ff)
   
    layout = dict(
        title=prod_name,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label='1m',
                         step='month',
                         stepmode='backward'),
                    dict(count=6,
                         label='6m',
                         step='month',
                         stepmode='backward'),
                    dict(step='all')
                ])
            ),
            rangeslider=dict(),
            type='date'
        )
    )

    fig = dict(data=data, layout=layout)
    off.plot(fig , filename= prod_name + '.html')

def get_frame_by_date_range(table,minDate,minTime,maxDate,maxTime):
    query = queryForRange.replace('@table' , table)
    query = query.replace('@minDate' , minDate )
    query = query.replace('@minTime' , minTime )
    query = query.replace('@maxDate' , maxDate )
    query = query.replace('@maxTime' , maxTime )
    return get_frame(query)
    
def get_frame(query ):
    df = pd.read_sql( query ,conn)
    print 'Leidos:',len(df)
    return df

def clean_data(frame):
    frame2 = frame.copy()

    frame2['datetime'] = frame2.apply(lambda x : datetime.combine( x['event_date'] , x['event_time'] ) , axis = 1)
    frame2['event_time'] = frame2['event_time'].apply(lambda x : time.strftime(x , '%H:%M:%S.%f'))
    frame2['event_date'] = frame2['event_date'].apply(lambda x : date.strftime(x , '%Y-%m-%d'))
    frame2 = frame2.set_index(['datetime'])
    
    return frame2 


In [8]:
conn = connect('localhost' , 'ds')
#conn = connect('192.168.1.3' , 'gtrader')
queryForRange = "select * from @table where (event_date + event_time) between (date '@minDate' + time '@minTime' ) and ( date '@maxDate' + time '@maxTime')";


Connecting to database: host='localhost' dbname='ds' user='postgres' password='postgres'
Connected!


## Modelo - Eventos estudiados

<ul>
    <li>Spread</li>

    <li>Demmand Buys p/ intervalo</li>
    <li>Demmand Sells p/ intervalo</li>
    <li>Demmand Buys totales</li>
    <li>Demmand Sells totales</li>
    
    <li>Absorciones (Bloqueos): Compras/Ventas hechas por la demanda que no logran comer el nivel de la oferta sobre el que 
        impactan. Por lo tanto la siguiente operacion del mismo tipo impacta en el mismo nivel de la oferta.
        <ul>
            <li>Absorciones Ask p/ intervalo</li>
            <li>Absorciones Ask totales</li>
            <li>Absorciones Bid p/ intervalo</li>
            <li>Absorciones Bid totales</li>
        </ul>
    </li>
    <li>Rupturas: Compras/Ventas hechas por la demanda que SI logran comer el nivel de la oferta sobre el que 
        impactan. Por lo tanto la siguiente operacion del mismo tipo impacta en un nivel superior/inferior de la oferta.
        <ul>
            <li>Rupturas Ask p/ intervalo</li>
            <li>Rupturas Ask totales</li>
            <li>Rupturas Bid p/ intervalo</li>
            <li>Rupturas Bid totales</li>            
        </ul>
    </li>
    <li>Contra-ataques: Avances de la oferta en un sentido.
        <ul>
            <li>Contra-ataques Ask p/ intervalo</li>
            <li>Contra-ataques Ask totales</li>
            <li>Contra-ataques Bid p/ intervalo</li>
            <li>Contra-ataques Bid totales</li>
        </ul>
    </li>   
    <li>Retiradas: Retrocesos de la oferta en un sentido.
        <ul>
            <li>Retiradas Ask p/ intervalo</li>
            <li>Retiradas Ask totales</li>
            <li>Retiradas Bid p/ intervalo</li>
            <li>Retiradas Bid totales</li>
        </ul>
    </li>  
    
    
    <li>Unknown counts p/ intervalo</li>
    <li>Unknown counts p/ totales</li>
</ul>

## Bitacora

Otras combinaciones disponibles como puntos de mejora
<lu>
   
     <li>Considerar distintos numeros de niveles del book</li>
     <li>Distintos humbrales de comienzo y fin de cada tramo</li>
    <li>Agregar mas de dos intervalos (solo si se puede reconocer algun label para un estado intermedio)</li>
    <li>Variar la regularizacion</li>
    <li>Hacer una regresion del ultimo estado para calcular su variacion promedio</li>
    <li>Probar para diferentes modelos clasificatorios, por ejemplo RNAs, Arboles, etc</li>
    <li>Los tramos estacionarios pueden tener una componente de tendencia. Se puede por medio de una regresion lineal determinar los coeficientes de la misma.</li>
    <li>Dentro de los tramos estacionarios puede haber varios cambios de pendiente, por lo cual si se construyera una RL para detrendearlas seria necesario calcularla por subintervalos</li>
 ----------------------------------
  <li>Listo: Variar el tamaño de las ventanas de tiempo para obtener distintas frecuencias</li>
    <li>Listo: Variar los features intervinientes en el modelo</li>
<li>Listo: Variar los porcentajes de avance de los intervalos (ej. 70-30 , 80-20 , 60-40 , etc)</li>
<li>Listo: Probar diferentes algoritmos</li>
     <li>Listo: La clasificacion puede indicar no solo '1' para cambio de estado sino un rango [-1,1] para indicar el sentido de dicho cambio</li>    
</lu>

In [9]:
float_fields = ['bid_price','ask_price' ,'event_price']
int_fields = ['bid_size','ask_size','event_size']
str_fields = ['event_type']
date_fields = ['event_date','event_time']
prices_and_sizes = reduce(lambda x,y : x+y , map(lambda x : [float_fields[x] , int_fields[x]] , [0,1,2]))
colnames = date_fields + str_fields + prices_and_sizes

#### Preparacion de datos y Generacion del set en dominio de frecuencias

In [10]:
def add_previous_offer(zs_tape):
    prev_ask = []
    prev_bid = []
    bids = zs_tape['bid_price'].values
    asks = zs_tape['ask_price'].values
    zs_t = zs_tape.copy()
    for i in range(0,len(zs_t)):
        if i > 0 :
            #print bids[i+1]
            prev_bid.append(bids[i-1])
            prev_ask.append(asks[i-1])
        else:
            #print "NaN"
            prev_bid.append("NaN")
            prev_ask.append("NaN")
    zs_t['prev_bid'] = prev_bid
    zs_t['prev_ask'] = prev_ask
    
    return zs_t

def add_next_offer(zs_tape):
    next_asks = []
    next_bids = []
    bids = zs_tape['bid_price'].values
    asks = zs_tape['ask_price'].values
    zs_t = zs_tape.copy()

    for i in range(0,len(zs_t)):
        if i < len(zs_t)-1:
            #print bids[i+1]
            next_bids.append(bids[i+1])
            next_asks.append(asks[i+1])
        else:
            #print "NaN"
            next_bids.append("NaN")
            next_asks.append("NaN")
    zs_t['next_bid'] = next_bids
    zs_t['next_ask'] = next_asks
    return zs_t

def get_trade_at(x):
    if x['event_type'] == 'TRADE':
        if x['event_price'] == x['ask_price']:
            return "ASK"
        elif x['event_price'] == x['bid_price']:
            return "BID"
        else:
            return "Unknown"
    else:
        return "NaN"

def generate_trade_events(x):
    if x['trade_at'] == 'BID':
        #absorcion
        if x['event_price'] == x['next_bid']:
            return "Absortion"
        elif x['next_bid'] < x['event_price']:
            #ruptura
            if x['event_size'] >= x['bid_size']:
                return "Break" 
            #ruptura por retroceso
            else:
                return "Break by Retracement"
        #absorcion por avance
        else:
            return "Absortion by Advance"
    elif x['trade_at'] == 'ASK':
        if x['event_price'] == x['next_ask']:
            return "Absortion"
        elif x['next_ask'] > x['event_price']:
            #ruptura
            if x['event_size'] >= x['ask_size']:
                return "Break" 
            #ruptura por retroceso
            else:
                return "Break by Retracement"
        #absorcion por avance
        else:
            return "Absortion by Advance"        
    else:
        return "NaN"
    
def generate_tick_events(x):
    if x['event_type'] == 'ASK':
        if x['event_price'] < x['next_ask']:
            return 'Retracement'
        elif x['event_price'] > x['next_ask']:
            return 'Advance'
        else:
            return 'Place'
    elif x['event_type'] == 'BID':
        if x['event_price'] < x['next_bid']:
            return 'Advance'
        elif x['event_price'] > x['next_bid']:
            return 'Retracement'
        else:
            return 'Place'        
    else:
        return "NaN"
    
def get_hour_and_minute(x):
    tokens = x.split(':')
    return (tokens[0],tokens[1])

def get_intervals(h,m,window):
    return [h +':'+ m + ':' + str(x) for x in range(0,60 , window)]

def get_intervals_collection(frame,window):
    hours_and_mins = set( map( lambda x: get_hour_and_minute(x) , frame['time'].values))
    times = reduce(lambda x,y : x+y , [get_intervals(x[0],x[1],window) for x in hours_and_mins])
    times_c = map(lambda x : datetime.strptime(x , '%H:%M:%S') , times)
    all_intervals_i = sorted(times_c)
    all_intervals_e = map(lambda x : datetime.strftime(x , '%H:%M:%S') , all_intervals_i)
    return all_intervals_e

def criteria(x,start,end):
    
    tstart = x[0]
    tend = x[1]
    label = x[2]
    
    start = datetime.strptime(start , '%H:%M:%S')
    end = datetime.strptime(end , '%H:%M:%S')
    tstart = datetime.strptime(tstart , '%H:%M:%S')
    tend = datetime.strptime(tend , '%H:%M:%S')  
    
    if start >= tstart and end <= tend:
        return True
    else:
        return False

def get_label_for_interval(label_id , start , end , flabels):
    t = filter(lambda x: criteria(x , start, end) , flabels)
    if len(t) > 0:
        return t[0][2]
    else:
        return "NaN"
    
def drop_na_and_clean(frame):
    frame2 = frame.copy()
    for c in colnames:
        frame2 = frame2[ ~pd.isnull(frame2[c])]

    #for x in str_fields + date_fields:
    #    frame2[x] = frame2[x].apply(lambda x : x.strip())

    for x in float_fields:
        frame2[x] = frame2[x].apply(lambda x : float(x))
    
    ## le agrego el pm o am. Los datos entre las 10 y las 12 son AM, los demas son PM.
    ##f = lambda x : x['event_time'] + ' am' if (x['event_time'] >= '10:00:00') & (x['event_time'] <= '12:00:00') else x['event_date'] + ' pm' 
    ##frame2['event_time'] = frame2.apply(f, axis=1)
    ##frame2['datetime'] = frame2.apply(lambda x: datetime.strptime(x['event_date']+x['event_time'], '%Y-%m-%d%I:%M:%S.%f %p') , axis=1)
    ##frame2['event_time'] = frame2.apply(lambda x : x['datetime'].strftime("%H:%M:%S.%f") , axis=1 )
    ##frame2 = frame2.set_index(['datetime'])

    frame2 = clean_data(frame2)
    
    return frame2 

def prepare_data_in_time_domain(frame):
    frame2 = frame.copy()
    ## Agregamos los anteriores Asks y Bids para cada registro
    frame2 = add_previous_offer(frame2)
    ## Agregamos los siguientes Bids y Asks para cada registro
    frame2 = add_next_offer(frame2)

    frame2['trade_at'] = frame2.apply(lambda x : get_trade_at(x) , axis = 1)
    frame2['trade_event'] = frame2.apply( lambda x : generate_trade_events(x) ,axis = 1)
    frame2['tick_event'] = frame2.apply( lambda x : generate_tick_events(x) ,axis = 1)
    return frame2

def generate_final_set(frame, max_limit , min_limit , func_datetimes ):
    
    trades_at_bid_size = []
    trades_at_ask_size = []
    trades_at_unknowns_size = []
    trades_at_bid_count = []
    trades_at_ask_count = []
    trades_at_unknowns_count = []

    initial_ask_prices = []
    initial_bid_prices = []
    final_ask_prices = []
    final_bid_prices = []
  
    
    starts = []
    ends = []
    
    #datetimes = frame.apply(lambda x : datetime.strptime( x['date']+x['time'], '%Y-%m-%d%H:%M:%S.%f') , axis = 1)
    #datetimes = map(lambda x : get_datetime_without_mili(x) , datetimes)
    #datetimes = pd.unique(datetimes)
    datetimes = func_datetimes(frame)
    
    for i in range(0 , len(datetimes)):

        if i < len(datetimes) -1:

            start = datetimes[i]
            end = datetimes[i+1]

            starts.append(start)
            ends.append(end)
            
            mini_set = frame[ (frame.index >= start) & (frame.index < end) ]

            ## Agrego el price en este momento
            ##
            initial_ask_price = mini_set['ask_price'].head(1).values[0]
            initial_bid_price = mini_set['bid_price'].head(1).values[0]
            last_ask_price = mini_set['ask_price'].tail(1).values[0]
            last_bid_price = mini_set['bid_price'].tail(1).values[0]
            initial_ask_prices.append(initial_ask_price)
            initial_bid_prices.append(initial_bid_price)
            final_ask_prices.append(last_ask_price)
            final_bid_prices.append(last_bid_price)
            
            ## operaciones a precio de mkt
            ##
            trades_at_bid = mini_set[mini_set['trade_at'] == 'BID']
            trades_at_ask = mini_set[mini_set['trade_at'] == 'ASK']
            trades_at_unknown = mini_set[mini_set['trade_at'] == 'Unknown']

            ## compras y ventas a mkt
            ##
            trades_at_bid_count.append(len(trades_at_bid))
            trades_at_bid_size.append(sum(trades_at_bid['event_size']))
            trades_at_ask_count.append(len(trades_at_ask))
            trades_at_ask_size.append(sum(trades_at_ask['event_size']))
            trades_at_unknowns_size.append(sum(trades_at_unknown['event_size']))
            trades_at_unknowns_count.append(len(trades_at_unknown))

           
    obj = {'start':starts , 'end': ends,
            'trades_at_bid_size': trades_at_bid_size , 
           'trades_at_ask_size': trades_at_ask_size , 
           'trades_at_unknowns_size': trades_at_unknowns_size, 
           'trades_at_bid_count': trades_at_bid_count , 
           'trades_at_ask_count': trades_at_ask_count,
           'trades_at_unknowns_count': trades_at_unknowns_count , 
           
           'initial_ask_price':initial_ask_prices ,
           'initial_bid_price':initial_bid_prices ,
           'final_ask_price':final_ask_prices ,
           'final_bid_price':final_bid_prices }
             
    n_columns=['start' , 'end',
        'trades_at_bid_size' , 
        'trades_at_bid_count' ,                
        'trades_at_ask_size' , 
        'trades_at_ask_count',               
        'trades_at_unknowns_size', 
        'trades_at_unknowns_count' , 
        'initial_ask_price','initial_bid_price','final_ask_price','final_bid_price'] 
    
    return pd.DataFrame(obj , columns=n_columns) 

def process_data_and_generate_set(inputdata, metadata ):

    min_limit = metadata.getMinDateTime()
    max_limit = metadata.getMaxDateTime()
    all_labels = metadata.intervals
    output_name = metadata.outputName
    
    outputSet = None
    
    ## 1) preparamos la data en el dominio de tiempo
    frame1 = prepare_data_in_time_domain(input_data)

    ## 2) generamos el dataset en el dominio de frecuencias
    frame2 = generate_final_set_2(frame1,max_limit , min_limit)

    ## 3) stablish limits to set
    frame3 = frame2[(frame2['start'] >= min_limit) & (frame2['end'] <= max_limit)]

    ## 4) apply labels
    frame4 = apply_labels(frame3, all_labels , output_name)
    outputSet = frame4
    
    if metadata.requireAdvances:
        ## 5) apply advances to labels
        advance_levels = advance_levels = [.5 + float(x)/20 for x in range(0,10)]
        frame5 = add_advance_in_frame_for_output_2(frame4 , all_labels , advance_levels , output_name)
        outputSet = frame5
    
    return outputSet

In [11]:

def _get_unique_datetimes_for_set_second_window(frame , window_seconds):
    framex = frame.copy()

    def get_window(x , size):
        r = x / size
        return r * size
    
    def get_trunked_datetime(d , t):
        dd = datetime.strptime(d+t,'%Y-%m-%d%H:%M:%S.%f')
        return datetime(dd.year , dd.month, dd.day , dd.hour, dd.minute, get_window(dd.second,window_seconds))

    applied = framex.apply(lambda x : get_trunked_datetime(x.event_date ,  x.event_time) , axis=1) 

    return np.unique(applied)

def get_unique_datetimes_by_5_seconds(frame):
    return _get_unique_datetimes_for_set_second_window(frame,5)

def get_unique_datetimes_by_1_minute(frame):
    framex = frame.copy()

    def get_trunked_datetime(d , t):
        dd = datetime.strptime(d+t,'%Y-%m-%d%H:%M:%S.%f')
        return datetime(dd.year , dd.month, dd.day , dd.hour, dd.minute, 0)


    applied = framex.apply(lambda x : get_trunked_datetime(x.event_date ,  x.event_time) , axis=1) 

    return np.unique(applied)

def get_unique_datetimes_by_1_second(frame):
    framex = frame.copy()

    def get_trunked_datetime(d , t):
        dd = datetime.strptime(d+t,'%Y-%m-%d%H:%M:%S.%f')
        return datetime(dd.year , dd.month, dd.day , dd.hour, dd.minute, dd.second)


    applied = framex.apply(lambda x : get_trunked_datetime(x.event_date ,  x.event_time) , axis=1) 

    return np.unique(applied)

## 1) Obtenemos los sets en dominio de frecuencias

### Obtenemos las metadatas

In [13]:
def get_data_frame(metadata , func_datetimes_frequencies):
    ## 2) leer la data de postgre
    frame = get_frame_by_date_range(metadata.table , metadata.minDate , metadata.minTime , metadata.maxDate , metadata.maxTime)

    ## 3) limpiamos la data
    input_data = drop_na_and_clean(frame)

    ## 1) preparamos la data en el dominio de tiempo
    frame1 = prepare_data_in_time_domain(input_data)

    ## 2) generamos el dataset en el dominio de frecuencias
    frame2 = generate_final_set(frame1,metadata.getMaxDateTime() , metadata.getMinDateTime(),func_datetimes_frequencies)

    ## 3) stablish limits to set
    #frame3 = frame2[(frame2['start'] >= metadata.getMinDateTime()) & (frame2['end'] <= metadata.getMaxDateTime())]
    
    return frame2


In [18]:
metadata_names = [
    'nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959',

]

metadatas = []
for m in metadata_names:
    metadatas.append( get_metadata(m ) )

print len(metadatas)

1


In [16]:
#    'nymex_future_gc_201712_neg_rev_alpha0_20171101_030000_20171101_165959',
#    'nymex_future_gc_201712_neg_rev_alpha0_20171101_180000_20171102_165959',
#    'nymex_future_gc_201712_neg_rev_alpha0_20171102_180000_20171103_165959',
#    'nymex_future_gc_201712_neg_rev_alpha0_20171105_180000_20171106_080000',
#    'nymex_future_gc_201712_neg_rev_alpha0_20171106_180000_20171107_165959',
#    'nymex_future_gc_201712_neg_rev_alpha0_20171107_180000_20171108_165959',
#   'nymex_future_gc_201712_neg_rev_alpha0_20171108_180000_20171109_165959',
#    'nymex_future_gc_201712_neg_rev_alpha0_20171109_180000_20171110_165959'

###  Generamos los sets por primera vez

In [17]:
## usamos frequencias de 5 segundos

initial_data_frames = {}

for m in metadatas:
    initial_data_frames['1_second_'+ m.labelName ] = get_data_frame(m , get_unique_datetimes_by_1_second)
    initial_data_frames['5_second_' + m.labelName] = get_data_frame(m,get_unique_datetimes_by_5_seconds)
    initial_data_frames['1_minute_' + m.labelName] = get_data_frame(m,get_unique_datetimes_by_1_minute)

Leidos: 88526
Leidos: 88526
Leidos: 88526


#### Guardamos en Pickle los Data Frames inciales

In [19]:
for i in initial_data_frames.keys():
    df = initial_data_frames[i]
    print 'Guardando:',i,'con', len(df),'registros'
    #put_mongo_data_frame(i + '_initial', df)
    put_pickle('initial' , i , df)

    dfx = get_pickle('initial' , i)
    print 'Comparando:',len(df),'vs',len(dfx)
    
    

Guardando: 1_second_nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959 con 26239 registros
Comparando: 26239 vs 26239
Guardando: 5_seconds_nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959 con 6183 registros
Comparando: 6183 vs 6183
Guardando: 1_minute_nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959 con 527 registros
Comparando: 527 vs 527
