In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [9]:
def likelihood(x, bid, ask, T): #x = [alpha, delta, eps, mu]
    """
    likelihood function for the model
    args:
        x: parameters of the model
        bid: observation of the bid side
        ask: observation of the ask side
        T: time bins
    """
    #compute likelihood with Ealsy's (15) notation
    from scipy.stats import poisson
    likelihood = (1-x[0])*poisson.pmf(k=bid,mu=x[2]*T)*poisson.pmf(k=ask,mu=x[2]*T)+\
                +x[0]*x[1]*poisson.pmf(k=bid,mu=x[2]*T)*poisson.pmf(k=ask,mu=(x[2]+x[3])*T)+\
                +x[0]*(1-x[1])*poisson.pmf(k=bid,mu=(x[2]+x[3])*T)*poisson.pmf(k=ask,mu=x[2]*T)
    return likelihood

def loss (x, bid, ask, T):
    """
    loss function for the model
    args:
        x: parameters of the model (to train)
        bid: list of observations of the bid side
        ask: list of observations of the ask side
        T: time bin width (assumed the same for each bin)
    """
    prod=[]
    #restricting the loss function to values which do not kill the output
    for b, a in zip(bid, ask):
        l=likelihood(x, b, a, T)
        if l>0: prod.append(l)
        else: continue
    return -np.prod(prod)

In [10]:
from scipy.optimize import minimize
from tqdm import tqdm
from datetime import timedelta
time_delta = timedelta(minutes=1)

occurrences = pd.read_csv("../data_cleaned/occurrences.csv")
methods = ['Nelder-Mead', 'Powell', 'CG', 'BFGS','L-BFGS-B', 'TNC', 'SLSQP']
params = []
for m in tqdm(methods, desc='Optimizing with different methods'):
        r=minimize(loss, x0=np.random.uniform(size=4), 
                args=(occurrences['bid_observations'], occurrences['ask_observations'], time_delta.total_seconds()),
                method=m, bounds=[(0, 1) for i in range(4)])
        params.append(r.x)
optimization_result = pd.DataFrame(params, columns=['alpha', 'delta', 'eps', 'mu'])
optimization_result['method'] = methods

  warn('Method %s cannot handle constraints nor bounds.' % method,
  warn('Method %s cannot handle constraints nor bounds.' % method,
  warn('Method %s cannot handle constraints nor bounds.' % method,
Optimizing with different methods: 100%|██████████| 7/7 [00:45<00:00,  6.48s/it]


In [11]:
optimization_result['PIN'] = optimization_result.apply(lambda x: x['alpha']*x['mu']/(x['alpha']*x['mu']+2*x['eps']), axis=1)

In [12]:
import os 
if os.path.isdir('../results')==False:
    os.mkdir('../results')
optimization_result.to_csv('../results/optimization_result.csv', index=False)
optimization_result

Unnamed: 0,alpha,delta,eps,mu,method,PIN
0,0.663389,0.629576,0.81647,0.489969,Nelder-Mead,0.166008
1,0.999934,0.999934,0.999934,0.999934,Powell,0.333319
2,0.183489,0.774928,0.481974,0.238476,CG,0.043423
3,0.096949,0.28628,0.743172,0.842129,BFGS,0.052069
4,0.882825,0.644999,0.142026,0.468985,L-BFGS-B,0.593097
5,0.354467,0.2451,0.068829,0.530597,TNC,0.577395
6,0.600453,0.857963,0.560459,0.635793,SLSQP,0.254055
