In [26]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date
import dateutil.parser
from pandas import DataFrame

import warnings
warnings.filterwarnings('ignore')

import time

In [65]:
romero_path = "/home/nromerodiaz93/Documents/Universidad/JI/Carlos/Parameters/Yo/datos completos.csv"

In [66]:
# Importamos los datos
#data = pd.read_csv('datos completos.csv', parse_dates=[2], sep=',', na_values='NA',low_memory=False)
data = pd.read_csv(romero_path, parse_dates=[2], sep=',', na_values='NA',low_memory=False)

In [2]:
t = time.time()

print(time.time()-t)

2.6226043701171875e-05


# Manejo de datos

In [9]:
# Funcion que genera un DataFrame con todos los dias (sin repetir) de los cuales tenemos datos

def GetDate(stockdata):
    '''
    Parameters:
    ------
    stockdata:
    DataFrame - Data of various stocks
    
    Return:
    ------
    days: DataFrame - DataFrame of all days for which there is data
    '''
    
    days = pd.DatetimeIndex(stockdata.index).normalize()
    days = pd.DataFrame(days)
    days.index = stockdata.index
    days.columns = ['dia']
    
    return days.drop_duplicates(keep='first').dia

In [12]:
# Funcion que inicializa las columnas: 'nombre', 'date_time', 'tipo', 'precio', 'volumen',
#                                      'BID', 'ASK', 'Mid_Price', 'Quoted_Spread'

def StockPreprocessing(stockdata, stock_ticker):
    '''
    Parameters:
    ------
    stockdata:
    DataFrame - Data of various stocks
    
    stock_ticker:
    String - Ticker of the stock we are interested in
    
    
    Return:
    ------
    stockdata:
    DataFrame - Data of stocks with the folloeing initialized columns: 
    nombre', 'date_time', 'tipo', 'precio', 'volumen', 'BID', 'ASK', 'Mid_Price', 'Quoted_Spread'
    '''
    
    stockname = stock_ticker + " CB Equity"
    
    #Se cambian los nombres de las columnas y se elimina lo demas
    stockdata = stockdata[['name', 'times', 'type', 'value', 'size']]
    stockdata.columns=['nombre','date_time','tipo','precio','volumen']      
    stockdata = stockdata[['nombre','date_time','tipo','precio','volumen']]
    
    
    #Se seleccionan los datos segun la accion y el horario que nos interesan
    stockdata = stockdata.loc[(stockdata["nombre"] == stockname)]
    stockdata.index = stockdata.date_time
    stockdata = stockdata.between_time('9:30','15:55')
    stockdata['dia'] = pd.DatetimeIndex(stockdata.date_time).normalize() 
    
    days = GetDate(stockdata)#.drop_duplicates(keep='first').dia
    
    BA = []
    
    for i in days:
        stockdailydata = stockdata[stockdata.dia == str(i)]
        
        init_values = stockdailydata.precio.values
        d = {'BID': init_values, 'ASK': init_values}
        BA_df = pd.DataFrame(data=d)
        #BA_df.index = stockdata.index
        
        bid = stockdailydata['tipo'] == 'BID'
        ask = stockdailydata['tipo'] == 'ASK'
        BA_df.BID = np.multiply(bid.values, stockdailydata.precio.values)
        BA_df.ASK = np.multiply(ask.values, stockdailydata.precio.values)
        
        #BA_df['BID'].replace(to_replace = 0, method = 'ffill')
        #BA_df['ASK'].replace(to_replace = 0, method = 'ffill')
        BA_df['BID'] = BA_df['BID'].replace(to_replace = 0, method = 'ffill').values
        BA_df['ASK'] = BA_df['ASK'].replace(to_replace = 0, method = 'ffill').values
        
        BA_df = BA_df.where(BA_df.BID <= BA_df.ASK, np.nan) #np.nan
        
        BA_df['Mid_price']     = 0.5*(BA_df['BID'].values + BA_df['ASK'].values)
        BA_df['Quoted_Spread'] = (BA_df['ASK'].values - BA_df['BID'].values)/(BA_df.Mid_price.values)
            
        #BA_df.index = stockdailydata.index
        
        BA.append(BA_df)
    
    BA = pd.concat(BA, axis=0)
    BA.index = stockdata.index
    stockdata = pd.concat([stockdata, BA], axis=1)
        
    return stockdata

## Evaluación
---
Evaluamos la función `StockPreprocessing()` para la acción ECOPETL

In [4]:
# Importamos los datos
data = pd.read_csv('datos completos.csv', parse_dates=[2], sep=',', na_values='NA',low_memory=False)

In [13]:
# Aplicamos nuestra funcion
prep_data = StockPreprocessing(data, 'ECOPETL')

# Variables de profundidad

In [50]:
# Funcion que calcula: 'BID_Depth', 'ASK_Depth', 'Depth', 'log_depth'

def StockDepth(stockdata):
    '''
    Parameters:
    ------
    stockdata:
    DataFrame - Data of the stock
    
    Return:
    ------
    stockdata:
    DataFrame - Data of the stock with the following initialized columns:
    'BID_Depth', 'ASK_Depth', 'Depth', 'log_depth'
    '''
    
    # Creamos columnas para las variables de profundidad
    init_values = np.zeros( np.shape(stockdata)[0] )
    vol = stockdata.volumen
    stockdata = stockdata.assign(**{'BID_depth': vol, 'ASK_depth': vol,
                          'Depth': init_values, 'log_depth': init_values})
    
    # Parametros de fecha
    days = GetDate(stockdata)
    totaldays = len(days)
    result = []
    
    #for i in range(1): # Para realizar el calculo de un solo dia
    for i in range(totaldays):
        
        t_start = time.time()
        stockdailydata = stockdata.loc[(stockdata['dia'] == str(days.iloc[i]))]
        print('Se inicia viendo el día:', str(days.iloc[i]))
        
        for j in range(1, np.shape(stockdailydata)[0] ):
            
            #Tipo BID
            if(stockdailydata.tipo[j]=="BID"):
                stockdailydata.ASK_depth[j] = stockdailydata.ASK_depth[j-1]     
                if(stockdailydata.precio[j] == stockdailydata.BID[j]):     
                    if(stockdailydata.precio[j] == stockdailydata.BID[j-1]):
                        stockdailydata.BID_depth[j] = stockdailydata.BID_depth[j-1] + stockdailydata.volumen[j]
                    elif(stockdailydata.precio[j] != stockdailydata.BID[j-1]):
                        stockdailydata.BID_depth[j] = stockdailydata.volumen[j]
                elif(stockdailydata.precio[j] != stockdailydata.BID[j]):
                    stockdailydata.BID_depth[j] = stockdailydata.BID_depth[j-1]   
                    
            #Tipo ASK
            elif(stockdailydata.tipo[j]=="ASK"):
                stockdailydata.BID_depth[j] = stockdailydata.BID_depth[j-1]       
                if(stockdailydata.precio[j] == stockdailydata.ASK[j]):
                    if(stockdailydata.precio[j] == stockdailydata.ASK[j-1]):
                        stockdailydata.ASK_depth[j] = stockdailydata.ASK_depth[j-1] + stockdailydata.volumen[j]
                    elif(stockdailydata.precio[j] != stockdailydata.ASK[j-1]):
                        stockdailydata.ASK_depth[j] = stockdailydata.volumen[j]
                elif(stockdailydata.precio[j] != stockdailydata.ASK[j]):
                    stockdailydata.ASK_depth[j] = stockdailydata.ASK_depth[j-1]
                    
            #Tipo TRADE
            elif(stockdailydata.tipo[j]=="TRADE"):
                if(stockdailydata.precio[j] == stockdailydata.ASK[j]):
                    stockdailydata.BID_depth[j] = stockdailydata.BID_depth[j-1]
                    stockdailydata.ASK_depth[j] = stockdailydata.ASK_depth[j-1] - stockdailydata.volumen[j]
                elif(stockdailydata.precio[j] == stockdailydata.BID[j]):
                    stockdailydata.BID_depth[i] = stockdailydata.BID_depth[i-1] - stockdailydata.volumen[i]
                    stockdailydata.ASK_depth[i] = stockdailydata.ASK_depth[i-1]
                else:
                    stockdailydata.BID_depth[i] = stockdailydata.BID_depth[i-1]
                    stockdailydata.ASK_depth[i] = stockdailydata.ASK_depth[i-1]
                    
        print("Comienzan a revisarse las condiciones")
                    
        # Eliminamos los datos que no tienen sentido
        for k in range(np.shape(stockdailydata)[0]):
            if(stockdailydata.BID_depth[k] < 0):
                stockdailydata.BID_depth[k] = 0
                
            if(stockdailydata.ASK_depth[k] < 0):
                stockdailydata.ASK_depth[k] = 0
                
            # Se calcula la profundidad
            stockdailydata.Depth[k] = stockdailydata.BID_depth[k] + stockdailydata.ASK_depth[k]
            
            # Se calcula la log-profundidad
            if(stockdailydata.ASK_depth[k] != 0 and stockdailydata.BID_depth[k] != 0):
                stockdailydata.log_depth[k] = np.log(stockdailydata.BID_depth[k] * stockdailydata.ASK_depth[k])
                
        # Se quitan los NaN de los datos de profundidad
        for l in range(0, len(stockdailydata.tipo)):   
            if (np.isnan(stockdailydata.Quoted_Spread[l]) == True): 
                stockdailydata.BID_depth[l] = 0
                stockdailydata.ASK_depth[l] = 0
                stockdailydata.Depth[l] = 0
                stockdailydata.log_depth[l] = 0
                
        
        result.append(stockdailydata)
        print('Se terminó de calcular el día: %s, demoró %.4f segundos'%(str(days.iloc[i]), time.time()-t_start))
        print('Faltan %i ciclos \n------------------' %(totaldays-i-1))
        
    resultDf = pd.concat(result, axis=0)
    print("TIEMPO TOTAL: %.4f HORAS" %((time.time()-t_start)/3600))
        
    return resultDf

## Evaluación
---
Evaluamos la función `StockDepth()` para la acción ECOPETL

In [51]:
prep_data2 = StockDepth(prep_data)

Se inicia viendo el día: 2017-03-03 00:00:00
Comienzan a revisarse las condiciones
Se terminó de calcular el día: 2017-03-03 00:00:00, demoró 221.7712 segundos
Faltan 118 ciclos 
------------------
Se inicia viendo el día: 2017-03-06 00:00:00
Comienzan a revisarse las condiciones


KeyboardInterrupt: 

# Buy-Sell

In [None]:
#ME PARECE QUE PODEMOS VOLARNOS ESTO
'''def DeleteCloseQuotes(stockdata):
    stockdata = stockdata['date_time'].drop_duplicates(keep='last')'''

In [14]:
def InitiatingParty(stockdata):
    '''
    Parameters:
    ------
    stockdata:
    DataFrame - Data of the stock
    
    Return:
    ------
    x:
    DataFrame - DataFrame of TRADE quotes with the party that initiated the trade
    '''
    
    x = stockdata[stockdata.tipo == 'TRADE']
    
    # +1: transaccion iniciada por comprador
    buyer  = x.precio.values > x.Mid_price.values
    
    # -1: transaccion iniciada por vendedor
    seller = x.precio.values < x.Mid_price.values
    
    x['iniciado'] = buyer.astype(int) - seller.astype(int)
    x['iniciado'] = x['iniciado'].replace(to_replace = 0, method = 'ffill').values
    
    return x

## Evaluación
---
Evaluamos la función `InitiatingParty()` para la acción ECOPETL

In [54]:
tst = InitiatingParty(prep_data)
tst.tail(10)

Unnamed: 0_level_0,nombre,date_time,tipo,precio,volumen,dia,BID,ASK,Mid_price,Quoted_Spread,iniciado
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-08-31 15:48:57,ECOPETL CB Equity,2017-08-31 15:48:57,TRADE,1365.0,37061.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,1
2017-08-31 15:49:35,ECOPETL CB Equity,2017-08-31 15:49:35,TRADE,1365.0,3794.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,1
2017-08-31 15:49:35,ECOPETL CB Equity,2017-08-31 15:49:35,TRADE,1365.0,396206.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,1
2017-08-31 15:49:52,ECOPETL CB Equity,2017-08-31 15:49:52,TRADE,1365.0,100000.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,1
2017-08-31 15:50:22,ECOPETL CB Equity,2017-08-31 15:50:22,TRADE,1360.0,1000.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,-1
2017-08-31 15:51:50,ECOPETL CB Equity,2017-08-31 15:51:50,TRADE,1360.0,1214.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,-1
2017-08-31 15:54:15,ECOPETL CB Equity,2017-08-31 15:54:15,TRADE,1365.0,3794.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,1
2017-08-31 15:54:15,ECOPETL CB Equity,2017-08-31 15:54:15,TRADE,1365.0,12278.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,1
2017-08-31 15:54:35,ECOPETL CB Equity,2017-08-31 15:54:35,TRADE,1360.0,1000.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,-1
2017-08-31 15:54:37,ECOPETL CB Equity,2017-08-31 15:54:37,TRADE,1360.0,3235.0,2017-08-31,1360.0,1365.0,1362.5,0.00367,-1


# Price impact

In [16]:
from datetime import timedelta
from sklearn import linear_model as lm
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

In [20]:
def ImpactParameters(stockdata):
    days = GetDate(stockdata)#.drop_duplicates(keep='first').dia
    res = []
    
    for i in days:
        stockdailydata = stockdata[stockdata.dia == str(i)]
        
        stockdailydata['delta_p']    = stockdailydata['precio'].diff()
        stockdailydata['order_flow'] = stockdailydata.volumen.values * stockdailydata.iniciado.values
        
        res.append(stockdailydata)
        
    res_df = pd.concat(res, axis=0)
    return res_df

In [21]:
def KyleImpactRegression(stockdata):
    
    days = GetDate(stockdata)#.drop_duplicates(keep='first').dia
    res = []
    
    for i in days:
        
        stockdailydata = stockdata[stockdata.dia == str(i)]
        
        x1 = stockdailydata.delta_p.values
        x1 = x1.reshape(-1, 1)
        
        x2 = stockdailydata.order_flow.values
        x2 = sm.add_constant(x2.reshape(-1, 1))
        
        result = sm.OLS(x1, x2, missing='drop').fit()
        
        coef = result.params[1]
        pvalue = result.pvalues[1]
        trades = len(stockdailydata)
        
        temp = [i, coef, pvalue, trades]
        res.append(temp)
        
    #res = pd.DataFrame(res, columns=['day', 'reg_coefficient', 'p_value', 'trades'])
    res = pd.DataFrame(res, columns=['dia', 'coef_regresion', 'p_value', 'trades'])
    res = res.set_index('dia')
    
    return res

## Evaluación
---
Evaluamos las funciones `ImpactParameters()` y `KyleImpactRegression()` para la acción ECOPETL

In [27]:
etet = ImpactParameters(tst)
etet.tail()

In [24]:
tretre = KyleImpactRegression(etet)
tretre.head()

# Resultados
---
Guardamos los resultados obtenidos al aplicar este código en el archivo csv `optim_results.csv`