In [1]:
from datetime import time , datetime , date
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


import psycopg2
import sys
import pprint
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as off


## Conexion con PostgreSQL

In [2]:

def connect(host , db):
    #conn_string = "host='192.168.1.3' dbname='gtrader' user='postgres' password='postgres'"
    conn_string = "host='" + host +"' dbname='" + db + "' user='postgres' password='postgres'"
    print "Connecting to database\n	->%s" % (conn_string)
    conn = psycopg2.connect(conn_string)
    print "Connected!\n"
    return conn

def get_frame(table ):
    df = pd.read_sql("select * from " + table ,conn)
    print 'Leidos:',len(df)
    return df

def clean_data(frame):
    frame2 = frame.copy()

    frame2['datetime'] = frame2.apply(lambda x : datetime.combine( x['event_date'] , x['event_time'] ) , axis = 1)
    frame2['event_time'] = frame2['event_time'].apply(lambda x : time.strftime(x , '%H:%M:%S.%f'))
    frame2['event_date'] = frame2['event_date'].apply(lambda x : date.strftime(x , '%Y-%m-%d'))
    frame2 = frame2.set_index(['datetime'])
    
    return frame2 

#conn = connect('192.168.1.3' , 'gtrader')
conn = connect('localhost' , 'ds')


Connecting to database
	->host='localhost' dbname='ds' user='postgres' password='postgres'
Connected!



## Analisis y exploración del Set

In [3]:

def analyze(prod_name, df  , features ):

    data = []
   
    ff = go.Scatter(
        x=df.index,
        y=df[features],
        name = prod_name,
        line = dict(color = '#17BECF'),
        opacity = 0.8)
    data.append(ff)
   
    layout = dict(
        title=prod_name,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label='1m',
                         step='month',
                         stepmode='backward'),
                    dict(count=6,
                         label='6m',
                         step='month',
                         stepmode='backward'),
                    dict(step='all')
                ])
            ),
            rangeslider=dict(),
            type='date'
        )
    )

    fig = dict(data=data, layout=layout)
    off.plot(fig , filename= prod_name + '.html')


def getUrl(d1,d2,label,table, output):

    minDate = d1.date().strftime('%Y-%m-%d')
    minTime = d1.time().strftime('%H:%M:%S')
    maxDate = d2.date().strftime('%Y-%m-%d')
    maxTime = d2.time().strftime('%H:%M:%S')
    finalLabelName = label + '_' + datetime.strftime(d1,'%Y%m%d_%H%M%S') + '_'+ datetime.strftime(d2,'%Y%m%d_%H%M%S')

    url = 'http://localhost:4000/gresearcher/#?'
    url += 'tableAsset=$tableAsset'
    url += '&labelName=$labelName'
    url += '&minDate=$minDate'
    url += '&minTime=$minTime'
    url += '&maxDate=$maxDate'
    url += '&maxTime=$maxTime'
    url += '&outputName=$outputName'


    final_url = url.replace('$tableAsset',table)
    final_url = final_url.replace('$labelName',label)
    final_url = final_url.replace('$minDate',minDate)
    final_url = final_url.replace('$minTime',minTime)
    final_url = final_url.replace('$maxDate',maxDate)
    final_url = final_url.replace('$maxTime',maxTime)
    final_url = final_url.replace('$outputName',output)
    
    return final_url

def printInformation(d1,d2,labelName,tableName,output):
    name = labelName + '_' + datetime.strftime(d1,'%Y%m%d_%H%M%S') + '_'+datetime.strftime(d2,'%Y%m%d_%H%M%S')
    print d1
    print d2
    print name
    print getUrl(d1,d2,name,tableName,output)
    
def prepare_analysis(d1,d2,labelName,dfx,tableName,output):
    name = labelName + '_' + datetime.strftime(d1,'%Y%m%d_%H%M%S') + '_'+datetime.strftime(d2,'%Y%m%d_%H%M%S')
    frame1 = dfx[(dfx['event_type'] == 'TRADE')  & (dfx.index >= d1) & (dfx.index <= d2)]
    analyze(name ,frame1, 'event_price')
    printInformation(d1,d2,labelName,tableName, output)
    return frame1

In [4]:
tableName = 'nymex_future_gc_201712'

In [8]:
## cargamos la data
df1 = get_frame(tableName)
df2 = clean_data(df1)

Leidos: 3786696


## Exploracion del set a alto nivel (alpha = 0)

In [5]:
labelName = 'nymex_future_gc_201712_neg_rev_alpha0'
output = 'neg_reversal'

In [6]:
##
## Periodo 1, 
##

d1 = datetime(2017,10,31,8,12,0)
d2 = datetime(2017,10,31,16,59,59)

printInformation(d1,d2,labelName,tableName,output)


2017-10-31 08:12:00
2017-10-31 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959&minDate=2017-10-31&minTime=08:12:00&maxDate=2017-10-31&maxTime=16:59:59&outputName=neg_reversal


In [19]:
##
## Periodo 2, 
##

d1 = datetime(2017,11,1,3,0,0)
d2 = datetime(2017,11,1,16,59,59)

prepare_analysis(d1,d2,labelName,df2,tableName,output)


2017-11-01 03:00:00
2017-11-01 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171101_030000_20171101_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171101_030000_20171101_165959&minDate=2017-11-01&minTime=03:00:00&maxDate=2017-11-01&maxTime=16:59:59&outputName=neg_reversal


In [20]:
##
## Periodo 3, 
##

d1 = datetime(2017,11,1,18,0,0)
d2 = datetime(2017,11,2,16,59,59)

prepare_analysis(d1,d2,labelName,df2,tableName,output)


2017-11-01 18:00:00
2017-11-02 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171101_180000_20171102_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171101_180000_20171102_165959&minDate=2017-11-01&minTime=18:00:00&maxDate=2017-11-02&maxTime=16:59:59&outputName=neg_reversal


In [21]:
##
## Periodo 4, 
##

d1 = datetime(2017,11,2,18,0,0)
d2 = datetime(2017,11,3,16,59,59)

prepare_analysis(d1,d2,labelName,df2,tableName,output)


2017-11-02 18:00:00
2017-11-03 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171102_180000_20171103_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171102_180000_20171103_165959&minDate=2017-11-02&minTime=18:00:00&maxDate=2017-11-03&maxTime=16:59:59&outputName=neg_reversal


In [22]:
##
## Periodo 5, 
##

d1 = datetime(2017,11,5,18,0,0)
d2 = datetime(2017,11,6,8,0,0)

prepare_analysis(d1,d2,labelName,df2,tableName,output)


2017-11-05 18:00:00
2017-11-06 08:00:00
nymex_future_gc_201712_neg_rev_alpha0_20171105_180000_20171106_080000
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171105_180000_20171106_080000&minDate=2017-11-05&minTime=18:00:00&maxDate=2017-11-06&maxTime=08:00:00&outputName=neg_reversal


In [23]:
##
## Periodo 6, 
##

d1 = datetime(2017,11,6,18,0,0)
d2 = datetime(2017,11,7,16,59,59)

prepare_analysis(d1,d2,labelName,df2,tableName,output)

2017-11-06 18:00:00
2017-11-07 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171106_180000_20171107_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171106_180000_20171107_165959&minDate=2017-11-06&minTime=18:00:00&maxDate=2017-11-07&maxTime=16:59:59&outputName=neg_reversal


In [24]:
##
## Periodo 7, 
##

d1 = datetime(2017,11,7,18,0,0)
d2 = datetime(2017,11,8,16,59,59)

prepare_analysis(d1,d2,labelName,df2,tableName,output)


2017-11-07 18:00:00
2017-11-08 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171107_180000_20171108_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171107_180000_20171108_165959&minDate=2017-11-07&minTime=18:00:00&maxDate=2017-11-08&maxTime=16:59:59&outputName=neg_reversal


In [25]:
##
## Periodo 8, 
##

d1 = datetime(2017,11,8,18,0,0)
d2 = datetime(2017,11,9,16,59,59)

prepare_analysis(d1,d2,labelName,df2,tableName,output)

2017-11-08 18:00:00
2017-11-09 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171108_180000_20171109_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171108_180000_20171109_165959&minDate=2017-11-08&minTime=18:00:00&maxDate=2017-11-09&maxTime=16:59:59&outputName=neg_reversal


In [26]:
##
## Periodo 9, 
##

d1 = datetime(2017,11,9,18,0,0)
d2 = datetime(2017,11,10,16,59,59)

prepare_analysis(d1,d2,labelName,df2,tableName,output)

2017-11-09 18:00:00
2017-11-10 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171109_180000_20171110_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171109_180000_20171110_165959&minDate=2017-11-09&minTime=18:00:00&maxDate=2017-11-10&maxTime=16:59:59&outputName=neg_reversal


### Analizando los intervalos y los volumenes

In [9]:

d1 = datetime(2017,10,31,8,12,0)
d2 = datetime(2017,10,31,16,59,59)

f = prepare_analysis(d1,d2,labelName,df2,tableName,output)


2017-10-31 08:12:00
2017-10-31 16:59:59
nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959
http://localhost:4000/gresearcher/#?tableAsset=nymex_future_gc_201712&labelName=nymex_future_gc_201712_neg_rev_alpha0_20171031_081200_20171031_165959&minDate=2017-10-31&minTime=08:12:00&maxDate=2017-10-31&maxTime=16:59:59&outputName=neg_reversal


In [10]:
examination = f[(f['event_time'] >= '10:03:30') & (f['event_time'] <= '10:05:00')]
examination = examination[examination['event_type'] == 'TRADE']


In [11]:
examination_sells = examination[examination.event_price == examination.bid_price]
examination_buys = examination[examination.event_price == examination.ask_price]


In [12]:
analyze('sells volume' ,examination_sells, 'event_size')

In [13]:
analyze('buys volume' ,examination_buys, 'event_size')

In [51]:
examination_sells.loc[:,'price_and_size'] = examination_sells.event_price * examination_sells.event_size

In [53]:
examination_buys.loc[:,'price_and_size'] = examination_buys.loc[:,'event_price'] * examination_buys.loc[:,'event_size']

In [54]:
analyze('sells volume' ,examination_sells, 'price_and_size')
analyze('buys volume' ,examination_buys, 'price_and_size')

In [55]:
examination = f
examination = examination[examination['event_type'] == 'TRADE']
examination_sells = examination[examination.event_price == examination.bid_price]
examination_buys = examination[examination.event_price == examination.ask_price]

In [56]:
analyze('sells volume 2' ,examination_sells, 'event_size')
analyze('buys volume 2' ,examination_buys, 'event_size')

In [57]:
examination_sells_over_100 = examination_sells[examination_sells.event_size > 100]
analyze('sells volume 3' ,examination_sells_over_100, 'event_size')


In [59]:
examination_buys_over_100 = examination_buys[examination_buys.event_size > 100]
analyze('sells volume 3' ,examination_buys_over_100, 'event_size')
