In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def likelihood(x, bid, ask, T): #x = [alpha, delta, eps, mu]
    """
    likelihood function for the model
    args:
        x: parameters of the model
        bid: observation of the bid side
        ask: observation of the ask side
        T: time bins
    """
    #compute likelihood with Ealsy's (15) notation
    from scipy.stats import poisson
    likelihood = (1-x[0])*poisson.pmf(k=bid,mu=x[2]*T)*poisson.pmf(k=ask,mu=x[2]*T)+\
                +x[0]*x[1]*poisson.pmf(k=bid,mu=x[2]*T)*poisson.pmf(k=ask,mu=(x[2]+x[3])*T)+\
                +x[0]*(1-x[1])*poisson.pmf(k=bid,mu=(x[2]+x[3])*T)*poisson.pmf(k=ask,mu=x[2]*T)
    return likelihood

def loss (x, bid, ask, T):
    """
    loss function for the model
    args:
        x: parameters of the model (to train)
        bid: list of observations of the bid side
        ask: list of observations of the ask side
        T: time bin width (assumed the same for each bin)
    """
    prod=[]
    #restricting the loss function to values which do not kill the output
    for b, a in zip(bid, ask):
        l=likelihood(x, b, a, T)
        if l>0: prod.append(l)
        else: continue
    return -np.prod(prod)

In [3]:
from scipy.optimize import minimize
from tqdm import tqdm
from datetime import timedelta
time_delta = timedelta(minutes=1)

occurrences = pd.read_csv("../data_cleaned/occurrences.csv")
methods = ['Nelder-Mead', 'Powell', 'CG', 'BFGS','L-BFGS-B', 'TNC', 'COBYLA', 'SLSQP']
params = []
for m in tqdm(methods, desc='Optimizing with different methods'):
        r=minimize(loss, x0=np.random.uniform(size=4), 
                args=(occurrences['bid_observations'], occurrences['ask_observations'], time_delta.total_seconds()),
                method=m, bounds=[(0, 1) for i in range(4)])
        params.append(r.x)
optimization_result = pd.DataFrame(params, columns=['alpha', 'delta', 'eps', 'mu'])
optimization_result['method'] = methods

  warn('Method %s cannot handle constraints nor bounds.' % method,
  warn('Method %s cannot handle constraints nor bounds.' % method,
  warn('Method %s cannot handle constraints nor bounds.' % method,
  warn('Method %s cannot handle bounds.' % method,
Optimizing with different methods: 100%|██████████| 8/8 [00:53<00:00,  6.66s/it]


In [4]:
optimization_result['PIN'] = optimization_result.apply(lambda x: x['alpha']*x['mu']/(x['alpha']*x['mu']+2*x['eps']), axis=1)

In [5]:
import os 
if os.path.isdir('../results')==False:
    os.mkdir('../results')
optimization_result.to_csv('../results/optimization_result.csv', index=False)
optimization_result

Unnamed: 0,alpha,delta,eps,mu,method,PIN
0,0.104522,0.391659,0.028658,0.407333,Nelder-Mead,0.42622
1,0.999934,0.999934,0.999934,0.999934,Powell,0.333319
2,0.461466,0.021205,0.586166,0.006902,CG,0.00271
3,0.198458,0.256669,0.242464,0.885388,BFGS,0.265973
4,0.802294,0.337093,0.756844,0.549495,L-BFGS-B,0.225555
5,0.565937,0.445428,0.872513,0.218627,TNC,0.066209
6,1.759679,1.024803,1.729266,0.778409,COBYLA,0.283693
7,0.625156,0.313984,0.508281,0.786013,SLSQP,0.325862
