In [1]:
###Import des packages

import numpy as np
import time
import scipy

In [2]:
def generation(n, alpha=1.7, beta=0.9, gamma=10, delta=10):
    # Initialize samples array with zeros
    sample = np.zeros(n)
    
    # Constants that do not depend on the sample index and thus can be computed once
    if alpha != 1:
        S_alpha_beta = (1 + beta ** 2 * np.tan(np.pi * alpha / 2) ** 2) ** (1 / (2 * alpha))
        B_alpha_beta = (1 / alpha) * np.arctan(beta * np.tan(np.pi * alpha / 2))

    for i in range(n):
        U = np.random.uniform(-np.pi/2, np.pi/2)
        W = -np.log(1 - np.random.uniform(0,1))
        
        # Handle the case alpha = 1 separately
        if alpha != 1:
            part1 = np.sin(alpha * (U + B_alpha_beta)) / (np.cos(U) ** (1 / alpha))
            part2 = (np.cos(U - alpha * (U + B_alpha_beta)) / W) ** ((1 - alpha) / alpha)
            sample[i] = S_alpha_beta * part1 * part2
        else:
            sample[i] = (2 / np.pi) * ((np.pi / 2 + beta * U) * np.tan(U) - beta * np.log((np.pi / 2 * W * np.cos(U))/(np.pi+beta*U)))

    # Apply scaling and location shifting
    sample = gamma * sample + delta
    return sample

In [3]:
def char_fun_zolo(t, nu, etha, tau):
    """characteristic function to use from S2 to S5

    Parameters
    ----------------------------
    t : float
    the "instant" we want to check

    nu : float
    parameter of the alpha-stable distribution, >= 1/4.

    etha :float
    parameter of the alpha-stable distribution, np.sign(etha) <= min(1, 2*np.sqrt(nu)-1)

    tau : float
    parameter of the alpha-stable distribution, absolute value < inf
    ------------------------------
    """

    y = -np.exp((nu**(-1/2))*(np.log(np.abs(t)) + tau - 1j*(np.pi/2)*etha*np.sign(t)) + np.e*((nu**(-1/2))-1))
    return y
    

In [4]:
def emp_char_fun(sample, t):
    """function to evaluate the empirical characteristic of a function at an instant t

    Parameters
    -------------------------
    sample : array-like
    sample on which we want to evaluate the characteristic function

    t : int, float
    moment to estimate/generate
    --------------------------
    """
    expo_transfo = np.exp(-sample*1j*t)
    empirical = np.mean(expo_transfo)
    return empirical
    

$\theta = (\alpha, \beta, \gamma, \delta)$ 

N = 1000

$x^1,...,x^p$ ~ $\pi(x|\theta)$

### So, we generate X and observe Y (that we also generated but we know the parameters)

$\pi_{LF}(\theta|y)$ = $\pi(\theta)\mathbb{E}_{\pi(x|\theta)}[K_{\epsilon} (y-x)]$ $\leftarrow$ we do monte carlo on this

$K_{\epsilon} (y-x)$ def by $S(y)$ ~ $N(S(x), \epsilon^2 \hat{\Sigma})$ $\leftarrow$ that would be our gaussian kernel, so we just draw $x_1,..., x_p$ ~ $\pi(x|\hat{\theta})$ then compute S(x), after that, estimate the cov(S(x)|$\hat{\theta}$) and then S(y) according to the previous distrib

$\hat{\Sigma}$ is an estimate of $Cov(S(x)\hat{\theta})$

$\Lambda$ = diag(0.25, 0.25, 1, 1)

Mutation kernel : $M_t(\theta_t) = \sum_{i=1}^{N} W_{t-1}^{(i)}(\theta_{t-1}^{(i)}) \phi(\theta_t ; \theta_{t-1}^{(i)})$

Gaussian kernel : $L(x,t) = \sum_{n = -M}^{M}f(x-n)G(n,t)$

with G(n,t) = $\frac{1}{\sqrt{2\pi t}}e^{-\frac{n^2}{2t}}$


As t increases, $\epsilon_t$ decreases

$\hat{c_t}$ is the 90th quantile of the weights

# For the rest we need to use : 

# $\alpha = 1.7$
# $\beta = 0.9$
# $\gamma = 10$
# $\delta = 10$

In [5]:
{}

{}

# Seed : 256652

In [6]:
np.random.seed(256652)

#creation des prior des parametres cf section 3.1

prior_alpha = np.random.uniform(1.1, 2., size=1000)
prior_beta = np.random.uniform(-1., 1, size=1000)
prior_gamma = np.random.uniform(0., 300., size=1000)
prior_delta = np.random.uniform(-300., 300., size=1000)

#scale parameters

#epsilon_t

mille = np.linspace(start=100, stop=1000, endpoint=True, num=10)[::-1]
cent = np.linspace(start=10, stop=100, endpoint=False, num=90)[::-1]
dix = np.linspace(start=5, stop=10, endpoint=False, num=10)[::-1]
cinq = np.linspace(start=3, stop=5, endpoint=False, num=40)[::-1]
trois = np.linspace(start=0, stop=3, endpoint=False, num=300)[::-1]

scale_param = np.concatenate((mille, cent, dix, cinq, trois))


N = 1000
varcov_lambda = np.array([0.25,0.,0.,0.,0.,0.25, 0., 0.,0.,0.,1.,0.,0.,0.,0.,1]).reshape([4,4])

In [None]:
def pi_lf(sample, summary_statistic):
    """function 
    

In [7]:
def gaussian_ker(u=0, y=0):
    """gaussian kernel for weights
    
    Parameters
    -------------------
    y : float
    the point we have, the output

    u : float
    the point from which we want to calculate a weight
    ----------
    """

    w = (1/np.sqrt(2*np.pi))*np.exp(-(u-y)**2/2)
    return w

In [8]:
def mc_culloch_q(data, gamma=1):
    """fonction pour calculer les quantiles de mc_culloch utilises dans le papier

    Parameters
    --------------
    data : array
    les donnes pour lesquelles on veut calculer les quantiles

    gamma : float
    valeur 1 par defaut, d'apres le papier, on utilise le gamma utilise pour faire les simulations
    ----------------
    """
    data_sorted = np.sort(data)
    useful_quantiles = np.quantile(a=data_sorted, q=[0.95, 0.75, 0.5, 0.25, 0.05])
    q_95 = useful_quantiles[0]
    q_75 = useful_quantiles[1]
    q_50 = useful_quantiles[2]
    q_25 = useful_quantiles[3]
    q_05 = useful_quantiles[4]
    alpha_hat = (q_95-q_05)/(q_75-q_25)
    beta_hat = (q_95+q_05+2*q_50)/(q_95-q_05)
    gamma_hat = (q_75-q_25)/gamma
    #on a le gamma au denominateur parce que c'est comme ca qu on a genere nos donnees
    #d'apres le papier, ca va de prendre le gamma qu'on a utililse pour generer
    delta_hat = np.mean(data_simulation)
    S_1 = np.transpose(np.array((alpha_hat, beta_hat, gamma_hat, delta_hat)))
    return S_1

In [9]:
def zolotarev_transfo(sample, xi):
    """function to use for the estimation based on the zolotarev transformation

    Parameters
    --------------------------
    Sample : array-like
    Sample to do the transformation on

    xi : int, float
    The constant used in the transformation
    --------------------------
    """
    if xi<=0 or xi>1/2 :
        raise ValueError('Xi must be between 0 and 1/2')
    taille = len(sample)
    Z = []
    for i in range(int(taille/3)):
        transfo = sample[3*i-2] - xi*sample[3*i-1] - (1 - xi)*sample[3*i]
        Z.append(transfo)
    V = []
    U = []
    for i in range(len(Z)):
        V.append(np.log(np.abs(Z[i])))
        U.append(np.sign(sample[i]))
    V = np.array(V)
    U = np.array(U)
    S_U_squared = (np.std(U))**2
    S_V_squared = (np.std(V))**2
    nu_tilde = (6/(np.pi)**2)*S_V_squared - (3/2)*S_U_squared + 1
    etha_hat = np.mean(U)
    tau_hat = np.mean(V)
    nu_hat = 0
    if nu_tilde > ((1+np.abs(etha_hat))**2)/4:
        nu_hat = nu_tilde
    else:
        nu_hat = ((1+np.abs(etha_hat))**2)/4
    delta_hat = np.mean(sample)
    S_2 = np.array((nu_hat, etha_hat, tau_hat, delta_hat))
    return S_2

In [10]:
def u_hat(x, t):
    """function to use to calculate presss moments

    Parameters
    ---------------
    x : array-like

    the data

    t : int, float
    the real number we want to know the image by the function
    ---------------
    """

    y = np.arctan(np.sum(np.cos(t*x))/np.sum(np.sin(t*x)))
    return y

In [11]:
test_u = np.array((np.pi, np.pi, np.pi, np.pi))

In [12]:
u_hat(x=test_u,t=1/4)

0.7853981633974484

In [13]:
u_hat(x=test_u, t=1/2)

6.123233995736766e-17

In [14]:
#derniere ligne confirme que c est bien 0, on tombe bien sur arctan(0) (aux approximations pres)

In [15]:
#fonction u_hat marche bien, on retombe sur arctan de 1

In [16]:
def presss_moments(sample, t_1=0.2, t_2=0.8, t_3=0.1, t_4=0.4):
    """function to calculate press's moments

    Parameters
    ----------------

    sample : array-like

    the data

    t_1 : int, float
    t_2 : int, float
    t_3 : int, float
    t_4 : int, float
    moments we want to evaluate, the default parameters are given by Koutrouvelis (1980)
    ---------------
    """
    
    #estimation of characteristic function at different moments
    transfo_1 = emp_char_fun(sample, t=t_1)
    transfo_2 = emp_char_fun(sample, t=t_2)
    transfo_3 = emp_char_fun(sample, t=t_3)
    transfo_4 = emp_char_fun(sample, t=t_4)
    log_gamma_hat_top = (np.log(np.abs(t_1))*np.log(-np.log(np.abs(transfo_2)))-np.log(np.abs(t_2))*np.log(-np.log(np.abs(transfo_1))))
    log_gamma_hat = log_gamma_hat_top/np.log(np.abs(t_1/t_2))
    alpha_hat = np.log(np.log(np.abs(transfo_1))/np.log(np.abs(transfo_2)))/np.log(np.abs(t_1/t_2))
    beta_hat_top = ((u_hat(x=sample, t=t_4)/t_4)-(u_hat(x=sample, t=t_3)/t_3))
    beta_hat_bot = ((np.abs(t_4)**(alpha_hat-1))-(np.abs(t_3)**(alpha_hat-1)))*((np.exp(log_gamma_hat))**alpha_hat)*np.tan((alpha_hat*np.pi)/2)
    beta_hat = beta_hat_top/beta_hat_bot
    delta_hat_top = (((np.abs(t_4))**(alpha_hat-1))*u_hat(x=sample, t=t_3)/t_3)-((((np.abs(t_3))**(alpha_hat-1))*u_hat(x=sample,t=t_4))/t_4)
    delta_hat_bot = (np.abs(t_4)**(alpha_hat-1)) - (np.abs(t_3)**(alpha_hat-1))
    delta_hat = delta_hat_top/delta_hat_bot
    y = np.array((alpha_hat, beta_hat, np.exp(log_gamma_hat), delta_hat))
    return y


    

In [17]:
test = generation(n=1000000)
S_hat = zolotarev_transfo(test, xi=0.15)
print(S_hat)

[ 0.57499934  0.38802339  2.41271775 10.05564935]


In [18]:
#ca converge tres tres lentement

In [19]:
presss_moments(sample=test)

array([ 0.56604276,  1.84795426,  8.1157908 , -9.96881507])

In [20]:
initial = np.zeros(4)
for i in range(250):
    test_2 = generation(n=1000, alpha=1.7, beta=0.9, gamma=10, delta=10)
    initial = np.vstack((initial, presss_moments(sample=test_2)))
    

resultat = np.zeros(4)
resultat[0] = np.mean(initial[:,0])
resultat[1] = np.mean(initial[:,1])
resultat[2] = np.mean(initial[:,2])
resultat[3] = np.mean(initial[:,3])
    

In [21]:
print(resultat)

[ 0.10902846  2.14843924  3.81087938 -4.70566537]


# Press
$\hat{\alpha}$ et $\hat{\delta}$ assez bons

Assez bof : $\hat{\gamma}$

Catastrophe : $\hat{\beta}$

# Le résultat précédent c'est quand on prenait un truc un peu au pif
# Quand on utilise leurs valeurs de $\alpha, \beta, \gamma, \delta$ en fait c'est pas mal
En fait c'est pas foufou...

In [22]:
np.zeros(4)


array([0., 0., 0., 0.])

In [23]:
%%capture
#data generation and definition of moments

data_simulation = generation(n=1000, alpha=2, beta=0.5, gamma=3, delta=4)

In [24]:
%%capture
#sort data to be sure the quantiles are well computed bc you never know
data_simulation = np.sort(data_simulation)

In [25]:
useful_quantiles = np.quantile(a=data_simulation, q=[0.95, 0.75, 0.5, 0.25, 0.05])

In [26]:
useful_quantiles

array([10.9038422 ,  6.76197407,  4.07913098,  1.04915232, -3.26879304])

In [27]:
q_95 = useful_quantiles[0]
q_75 = useful_quantiles[1]
q_50 = useful_quantiles[2]
q_25 = useful_quantiles[3]
q_05 = useful_quantiles[4]

### Mc Culloch's quantiles

In [28]:
alpha_hat = (q_95-q_05)/(q_75-q_25)
beta_hat = (q_95+q_05+2*q_50)/(q_95-q_05)
gamma_hat = (q_75-q_25)/3
#on a le 3 au denominateur parce que c'est comme ca qu on a genere nos donnees
#d'apres le papier, ca va de prendre le gamma qu'on a utililse pour generer
delta_hat = np.mean(data_simulation)

In [29]:
S_1 = np.transpose(np.array((alpha_hat, beta_hat, gamma_hat, delta_hat)))

In [30]:
S_1

array([2.48084675, 1.11435247, 1.90427392, 3.99770943])

In [31]:
delta_hat

3.997709434703393

Test runned with one million simulations, the estimators seemed to be biased.

def prior_lf_mc():
    "estimation monte carlo de la vraisemblance
    

def smc_prc_abc(N=1000, epsilon_t):
    """Algorithme ABC de l'appendix A de l'article.


    Parameters
    ---------------------------
    N : int
    Nombre d'echantillons crees pour les parametres

    epsilon_t : array
    Les marges d'acceptation pour l'algorithme
    """
    #les prior sont des uniformes cf partie 3.1
    prior_alpha = np.random.uniform(1.1, 2., size=N)
    prior_beta = np.random.uniform(-1., 1, size=N)
    prior_gamma = np.random.uniform(0., 300., size=N)
    prior_delta = np.random.uniform(-300., 300., size=N)
    prior_gen = np.vstack((prior_alpha,prior_beta,prior_delta, prior_gamma))
    
    #the weights
    

    

    

In [32]:
a = np.array((1,2,3,4,5))

In [33]:
a

array([1, 2, 3, 4, 5])

In [34]:
b = np.array((5,4,3,2,1))

In [35]:
a/b

array([0.2, 0.5, 1. , 2. , 5. ])

In [36]:
c = np.array((5,1,3,2,4))

In [37]:
test = np.vstack((a,b,c))

In [38]:
test

array([[1, 2, 3, 4, 5],
       [5, 4, 3, 2, 1],
       [5, 1, 3, 2, 4]])

In [39]:
test = np.hstack((a,b,c))

In [40]:
test

array([1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 5, 1, 3, 2, 4])

In [41]:
np.linalg.norm(a - b)

6.324555320336759

In [42]:
test[0,:]

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [46]:
zebulon = generation(n=1000)

In [55]:
def emp_cdf(sample,t):
    """a function made to calculate the empirical cumulative distribution function

    Parameters
    ------------------
    sample : array-like
    data, generated or observed

    t : int, float
    the argument in which we want to evaluate the empirical cdf
    ------------------
    """
    result = 0
    sorted_sample = np.sort(sample)
    for i in sorted_sample:
        if i<=t:
            result += 1/len(sample)
    return result

In [61]:
def kolmogorov_smirnov(sample,proposal,q_1=0.25,q_2=0.75):
    """a function to get the S_5 statistic from the article

    Parameters
    ---------------
    sample : array-like
    the observed data about which we want to see if we are close or not 

    proposal : array-like
    the proposal X, simulated from parameters, to see if it fits

    q_1,q_2 : float
    must be between 0 and 1, values to give to have a certain quantile in the summary statistics
    ---------------
    """
    if (0<q_1<1 and 0<q_2<1) == False :
        raise ValueError('q_1 and q_2 must be between 0 and 1')
    KS = 0
    z = np.linspace(start=-400, stop=400)
    for i in z:
        interm = np.abs(emp_cdf(sample,i)-emp_cdf(proposal,i))
        if interm>KS:
            KS=interm
    quantiles = np.quantile(a=proposal, q=[q_1, q_2])
    mean_sample = np.mean(proposal)
    return np.array((mean_sample, quantiles[0], quantiles[1], KS))
    

In [64]:
true_one = generation(n=10000)
proposition_1 = generation(n=10000, alpha=2, beta=1, delta=8, gamma=5)
print(kolmogorov_smirnov(sample=true_one, proposal=proposition_1), '\n',
      kolmogorov_smirnov(sample=true_one, proposal=true_one))

[ 7.93045112  3.16116078 12.63352597  0.1332    ] 
 [ 9.25981505 -2.4349357  17.12690742  0.        ]


In [None]:
def accept_reject_abc(N=1000, epsilon_t=0.5, alpha=2, beta=0, gamma=1, delta=0):
    """algorithme d'acceptation rejet 'basique', vu en cours

    Parameters
    ---------------------------------
    N : int
    nombre de simulations faites

    epsilon_t : array
    scale parameters, marge d'acceptation

    alpha, beta, gamma, delta :
    parametres pour les simulations
    -------------------------------
    """
    data = generation(n=N, alpha=alpha, beta=beta, gamma=gamma, delta=delta)
    #priors
    prior_alpha = np.random.uniform(1.1, 2., size=5*N)
    prior_beta = np.random.uniform(-1., 1, size=5*N)
    prior_gamma = np.random.uniform(0., 300., size=5*N)
    prior_delta = np.random.uniform(-300., 300., size=5*N)
    prior_gen = np.vstack((prior_alpha,prior_beta,prior_delta, prior_gamma))
    true_data_estim = zolotarev_transfo(sample=data, xi=0.15)
    bon_param = np.array(())
    alpha_test = 0
    beta_test = 0
    gamma_test = 0
    delta_test = 0
    for i in range(5*N):
        proposed_data = generation(n=N, alpha=prior_gen[:,i][0],
                                   beta=prior_gen[:,i][1],
                                   delta=prior_gen[:,i][2],
                                   gamma=prior_gen[:,i][3])
        alpha_test = prior_gen[:,i][0]
        beta_test = prior_gen[:,i][1]
        gamma_test = prior_gen[:,i][2]
        delta_test = prior_gen[:,i][3]
        estimated = mc_culloch_q(proposed_data, gamma=prior_gen[:,i][3])
        if np.linalg.norm(estimated-true_data_estim) < epsilon_t:
            return alpha_test, beta_test, gamma_test, delta_test
        else :
            continue

In [None]:
test_1 = accept_reject_abc(N=200, epsilon_t=0.3)

In [None]:
start = time.time()
test_1
end = time.time()
print(test_1, end - start)

In [None]:
#avec une si petite distance ca ne marchait pas

In [None]:
start = time.time()
liste_parametres = []
for i in range(len(scale_param)):
    pouet = accept_reject_abc(N=200, epsilon_t=scale_param[-i])
    intermediaire = [scale_param[-i], pouet]
    liste_parametres.append(intermediaire)

end = time.time()

print(end - start)

In [None]:
#must change the parameters, N must be 200, too long otherwise

In [None]:
%%capture
liste_parametres

In [None]:
#not working very well, new distance needed

In [None]:
#Works probably, pb : wrong use of mc culloch quantiles, need to invert --> pb , how

In [None]:
liste_parametres

In [None]:
type(None)

In [None]:
total = np.zeros(4)
for i in range(len(liste_parametres)):
    if type(liste_parametres[i][1]) != None:
        interm = np.array(liste_parametres[i][1])
        print(interm)
        total = np.vstack((total, interm))

In [None]:
if 1 == 2:
    print('no')
else:
    print('yes')

In [None]:
liste_parametres[3][1]

In [None]:
for i in range(len(liste_parametres)):
    if liste_parametres[i][0] == 4.85:
        print(np.array(liste_parametres[i][1]))

In [None]:
#faudra changer aussi le format du test, pas liste mais passer direct en array

In [43]:
a

array([1, 2, 3, 4, 5])

In [44]:
b

array([5, 4, 3, 2, 1])

In [45]:
np.linalg.norm(a-b)

6.324555320336759

In [46]:
np.sqrt(40)

6.324555320336759