In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

In [31]:
data_path = '../data/'
BSamt = pd.read_csv(data_path+'buyamt.txt',delim_whitespace=True)
BSamt.loc[:,'date'] = pd.to_datetime(BSamt['date'],format = '%Y-%m-%d',errors = 'coerce')
BSamt.head()

Unnamed: 0,date,MFD_BUYAMT_A_1,MFD_SELLAMT_A_1,MFD_BUYAMT_A_2,MFD_SELLAMT_A_2,MFD_BUYAMT_A_3,MFD_SELLAMT_A_3,MFD_BUYAMT_A_4,MFD_SELLAMT_A_4,code
0,2017-03-17,37679130.0,66446440.0,28845356.0,42765575.0,29105795.0,68467273.0,29105795.0,68467273.0,600000.SH
1,2017-03-20,17207390.0,54096160.0,27110699.0,34954565.0,31921862.0,26525834.0,31921862.0,26525834.0,600000.SH
2,2017-03-21,34142020.0,141744700.0,42645102.0,95691208.0,53642089.0,43922409.0,53642089.0,43922409.0,600000.SH
3,2017-03-22,44230730.0,170243000.0,88170741.0,101858270.0,57872201.0,129818879.0,57872201.0,129818879.0,600000.SH
4,2017-03-23,53226020.0,34041210.0,70730807.0,44377891.0,55058740.0,36724126.0,55058740.0,36724126.0,600000.SH


In [111]:
# transfer raw data into buy/sell quantity
BSamt['BUY'] =BSamt['MFD_BUYAMT_A_1']/1000000+BSamt['MFD_BUYAMT_A_2']/500000+BSamt['MFD_BUYAMT_A_3']/150000+BSamt['MFD_BUYAMT_A_4']/40000
BSamt['SELL'] =BSamt['MFD_SELLAMT_A_1']/1000000+BSamt['MFD_SELLAMT_A_2']/500000+BSamt['MFD_SELLAMT_A_3']/150000+BSamt['MFD_SELLAMT_A_4']/40000
BSamt.loc[:,['BUY','SELL']]= BSamt.loc[:,['BUY','SELL']].fillna(0)
BSamt.loc[:,'BUY'] = [int(t) for t in BSamt['BUY']]
BSamt.loc[:,'SELL'] = [int(t) for t in BSamt['SELL']]
PINamt = BSamt.loc[:,['date','BUY','SELL','code']]
PINamt.head()

Unnamed: 0,date,BUY,SELL,code
0,2017-03-17,1017,2320,600000.SH
1,2017-03-20,1082,963,600000.SH
2,2017-03-21,1818,1724,600000.SH
3,2017-03-22,2053,4484,600000.SH
4,2017-03-23,1938,1285,600000.SH


In [122]:
# initialize parameters for PIN estimations
def init_params(B,S):
    B = B/100
    S = S/100
    alph = 0.1
    delt = 0.3
    gamm = 0.5
    B_bar = np.average(B)
    epsiB = gamm*B_bar
    miu = (B_bar-epsiB)/(alph*(1-delt))
    epsiS = np.average(S)-alph*delt*miu
    return [alph,delt,miu,epsiB,epsiS]

In [137]:
def pin_likelihood_EHO(params,B,S): 
    
    '''
    estimate joint likelihood function using factorization from Easley, Hvidkjaer, and O’Hara (2010) 
    
    :param params: tuple, \alpha, \delta \mu, \epsilon_B, \epsilon_S 
    :param B: list, list of buy flow in quantity
    :param S: list, list of sell flow in quantity
    :return: float, joint likelihood function, the likelihood of the params given the buy and sell flow
 
    '''
    #initialize parameter values
    alph,delt,miu,epsiB,epsiS = params
    
    #initialize likelihood
    likel = 0.0
    
    #assign number of trading days
    trad_days  = min(len(B),len(S))
    B = B/100
    S = S/100
    
    for j in range(1,trad_days):
        #number of buy- and sell-trads for the trading day
        buy_s = B[j]
        sell_s =S[j]
        
        #compute values of interest for the log-likelihood function
        M  = min(buy_s,sell_s)+max(buy_s,sell_s)/2
        Xs = epsiS/(miu+epsiS)
        Xb = epsiB/(miu+epsiB)
        
        a1 = np.exp(-miu)
        a2 = Xs**(sell_s-M)
        a3 = Xb**(buy_s-M)
        a4 = Xs**(-M)
        a5 = Xb**(-M)
        
        part1 = -epsiB-epsiS+M*(np.log(Xb)+np.log(Xs))+buy_s*np.log(miu+epsiB)+sell_s*np.log(miu+epsiS)
        part2 = np.log(alph*(1-delt)*a1*a2*a5+alph*delt*a1*a3*a4+(1-alph)*a2*a3)
        
        likel = likel+part1+part2

    likel *= -1
    return likel
            

In [141]:
def pin_likelihood_LK(params,B,S): 
    '''
    estimate joint likelihood function using factorization from Lin and Ke(2011)
    
    :param params: tuple, \alpha, \delta \mu, \epsilon_B, \epsilon_S 
    :param B: list, list of buy flow in quantity
    :param S: list, list of sell flow in quantity
    :return: float, joint likelihood function, the likelihood of the params given the buy and sell flow
 
    '''
    #initialize parameter values
    alph,delt,mu,epsiB,epsiS = params
    
    #initialize likelihood
    likel = 0.0
    B = B/100 # scale adjustment
    S = S/100
    #assign number of trading days
    trad_days  = min(len(B),len(S))
    
    for j in range(1,trad_days):
        #number of buy- and sell-trads for the trading day
        buy_s = B[j]
        sell_s =S[j]
        
        #compute values of interest for the log-likelihood function
        e1 = -mu-sell_s*np.log(1+mu/epsiS)
        e2 = -mu-buy_s*np.log(1+mu/epsiB)
        e3 = -buy_s*np.log(1+mu/epsiB)-sell_s*np.log(1+mu/epsiS)
        e_m = max(e1,e2,e3)
        
        part1 = -epsiB-epsiS+buy_s*np.log(mu+epsiB)+sell_s*np.log(mu+epsiS)+e_m
        part2 = np.log(alph*(1-delt)*np.exp(e1-e_m)+alph*delt*np.exp(e2-e_m)+(1-alph)*np.exp(e3-e_m))
        
        likel = likel+part1+part2
    likel *= -1
    
    return likel

In [143]:
# an example for estimating PIN value use both methods on a 30 day window
code = '600000.SH'
trades = PINamt[PINamt.code==code]
period = 30
trades  = trades.reset_index(drop = True)
i = 31
B = trades.loc[i-period:i,'BUY']
S = trades.loc[i-period:i,'SELL']
params = EHO_init_params(B,S)
pin_likelihood_EHO(params,B,S)
opt_params = minimize(pin_likelihood_EHO,params,args = (B,S),method = 'Nelder-Mead').x
alph,delt,mu,epsiB,epsiS = opt_params
PIN_EHO= (alph*mu)/(alph*mu+epsiB+epsiS)
print('PIN_EHO {}'.format(PIN_EHO))
params = EHO_init_params(B,S)
pin_likelihood_LK(params,B,S)
bnds = ((0, 1), (0, 1),(0,None),(0,None),(0,None))
opt_params2 = minimize(pin_likelihood_LK,params,args = (B,S),method = 'Nelder-Mead',bounds = bnds).x
alph,delt,mu,epsiB,epsiS = opt_params2
PIN_LK= (alph*mu)/(alph*mu+epsiB+epsiS)
print('PIN_LK {}'.format(PIN_LK))

-1909.132078973779

PIN_EHO 0.11729866760493875


-1909.1320789737792

PIN_LK 0.11729866760493875


In [None]:
# functionize the process of estimating PIN value for every 30 trade days
def PIN_by_code(params,df,code):
    # assign calculation period (30 day basis)
    trades = df[df.code==code]
    period = 30
    trades.reset_index(drop = True)
    for i in range(peiod,len(trades)):
        B = trades.loc[i-period:i,'BUY']
        S = trades.loc[i-period:i,'SELL']
        params = init_params(B,S)
        from scipy.optimize import minimize
        opt_params = minimize(pin_likelihood_LK,params,args = (B,S),method = 'Nelder-Mead').x
        alph,delt,miu,epsiB,epsiS = opt_params
        trades.loc[i,'PIN_EHO'] = (alph*miu)/(alph_epsiB+epsiS)
    return trades.loc[:,'PIN_EHO']