In [1]:
import numpy as np 
from scipy.stats import norm
from scipy.linalg import cholesky

$$
\mathbb{P}\left(Y \in \cdot \mid Y \in \mathrm{S}_{\mu^{(t)}, \mathbf{i}}\right) \text { et } \mathbb{P}\left(Y \in \cdot \mid\left[\mu^{(t)}\right]^T Y=s\right)
$$
Pour le premier cas : on définit la strat, on tire un point au hasard dans la strat, et ensuite on échantillonne conditionnellement à ce point !
On le réécrit : 
$$
X=v^{\top} \xi \sim N\left(0, v^{\top} \Sigma v\right)=N(0,1)
$$
On a la distrbution conditionnelle : 

$$
(\xi \mid X=x) \sim N\left(\frac{\Sigma v}{v^{\top} \Sigma v} x, \Sigma-\frac{\Sigma v v^{\top} \Sigma}{v^{\top} \Sigma v}\right)=N\left(\Sigma v x, \Sigma-\Sigma v v^{\top} \Sigma\right)
$$
On utilise le tips ci-dessous pour pouvoir simuler (décomposition du terme en variance)

$$
\begin{aligned}
& \left(A-\Sigma v v^{\top} A\right)\left(A-\Sigma v v^{\top} A\right)^{\top} \\
& \quad=A A^{\top}-A A^{\top} v v^{\top} \Sigma-\Sigma v v^{\top} A A^{\top}+\Sigma v v^{\top} \Sigma v v^{\top} \Sigma \\
& \quad=\Sigma-\Sigma v v^{\top} \Sigma
\end{aligned}
$$

Suivant que l'on veuille sample avec ou non une condition sur la valeur de X, on le spécifie dans la fonction ci-dessous.

$$
\begin{aligned}
& \text { for } i=1, \ldots, K \\
& \quad \text { generate } U \sim \operatorname{Unif}[0,1] \\
& \quad V \leftarrow(i-1+U) / K \\
& \quad X \leftarrow \Phi^{-1}(V) \\
& \quad \text { generate } Z \sim N(0, I) \text { in } \Re^d \\
& \quad \xi \leftarrow \Sigma v X+\left(A-\Sigma v v^{\top} A\right) Z
\end{aligned}
$$

In [19]:
def stratified_sampling_linear_projection(mu, Sigma, v, K, x=None):
    """
    Generates K samples from N(0, Sigma) stratified along the direction determined by v.
    
    Args:
    - mu (array): The mean vector of the distribution.
    - Sigma (2D array): The covariance matrix of the distribution.
    - v (array): The vector along which stratification is done.
    - K (int): The number of stratified samples to generate.
    - x (optionnal) :  si on donne une valeur pour x alors c'est qu'on conditionne à une valeur particulière de x. Sinon on sample sur tout la strat Si. Taille K
    
    Returns:
    - samples (2D array): The generated stratified samples. (K, de N(0,sigma), stratifié sur la direction donnée par v)
    """
    d = len(mu)  # Dimension of the normal distribution
    # Normalize v so that v^T Sigma v = 1
    v = v / np.sqrt(v.T @ Sigma @ v)
    
    # Generate K stratified samples for the standard normal distribution along v
    U = np.random.uniform(0, 1, K)     # Uniformly distributed samples in (0,1)
    V = (np.arange(K) + U) / K         # Stratified samples in (0,1)
    X = norm.ppf(V)                    # Inverse CDF (quantile function) to get stratified samples for N(0, 1)
   
    # Compute the matrix A for the conditional distribution of xi given x
    A = cholesky(Sigma, lower=True)    # Cholesky factorization
    A_minus_v_Sigma_v_T_A = A - np.outer(Sigma @ v, v.T @ A)
    
    # Generate K samples from the conditional distribution of xi given X
    Z = np.random.randn(K, d)  # Z ~ N(0, I) in d dimensions 
    if x is not None:

        xi_samples = Sigma @ v * x[:, None] + (A_minus_v_Sigma_v_T_A @ Z.T).T  # Conditional samples 
        #xi_samples = Sigma @ v @ x + (A_minus_v_Sigma_v_T_A @ Z.T).T

    else:
        xi_samples = Sigma @ v * X[:, None] + (A_minus_v_Sigma_v_T_A @ Z.T).T 
        #xi_samples = Sigma @ v @ X + (A_minus_v_Sigma_v_T_A @ Z.T).T 
        
    
    return xi_samples + mu             # Add the mean to each sample

In [36]:
# Example usage:
mu = np.array([1, 2, 3])               # Mean vector - on est en 3 dimensions 
Sigma = np.array([[1, 0.5, 0.1],       # Covariance matrix
                  [0.5, 2, 0.3],
                  [0.1, 0.3, 1]])
v = np.array([1, 1, 1])                # Vector for stratification
K = 10                            # Number of samples to generate - 10 strates. Donc pour chaque strat les 3 coordonnées. 

# Generate stratified samples
samples = stratified_sampling_linear_projection(mu, Sigma, v, K)
#samples  # Display first 5 samples for brevity
samples

array([[ 1.27757921, -0.74874424,  1.94753117],
       [ 1.0554582 ,  0.61522568,  1.30962405],
       [ 1.25004951, -0.83786194,  4.10606524],
       [ 1.42657562,  0.39995164,  3.19738725],
       [ 1.81445556,  1.77010282,  2.33419586],
       [ 1.8065481 ,  3.17279559,  1.56092353],
       [ 1.39634554,  1.78821379,  3.83550115],
       [ 2.03526287,  2.43684937,  3.38135984],
       [ 1.19407248,  3.23867634,  3.78964664],
       [ 2.71891094,  3.94450754,  3.49833881]])

# Calcul de Si, la strat Si

$$
\mathrm{S}_{\mathbf{i}} \stackrel{\text { def }}{=} \prod_{k=1}^m\left(G_k^{-1}\left(\frac{i_k-1}{I}\right), G_k^{-1}\left(\frac{i_k}{I}\right)\right]
$$
On calcul Si, et on va tirer dedans. 

In [37]:

def strats_Si(I):
    # Calcul de Si pour chaque ik de 1 à I
    results = [(norm.ppf((ik - 1) / I), norm.ppf(ik / I)) for ik in range(1, I + 1)]
    return results

# Afficher les premiers et derniers éléments pour vérification
results = strats_Si(10000)
results[:5], results[-5:]

#-inf*mu -3$mu


def p_i_mu(I):
    return 1/I

In [28]:
def sample_from_hypercube(i_vector: list[int], 
                          I: int) -> np.array :
    """
    Sample a point uniformly from the hypercube defined by the indices in i_vector
    with respect to the standard normal distribution.
    
    Args:
    i_vector (list of int): Indices defining the hypercube.
    I (int): The total number of divisions along each dimension.
    
    Returns:
    A list of float: A point sampled uniformly from the hypercube.
    """
    # Calculate the bounds of the interval for the current dimension
    lower_bound = norm.ppf((np.array(i_vector) - 1) / I)
    upper_bound = norm.ppf(np.array(i_vector) / I)

    # Sample uniformly from this interval
    sample = np.random.uniform(lower_bound, upper_bound)

    return sample


On veut sampler un point suivant cette distrbution : 
$$\mathbb{P}\left(Y \in \cdot \mid Y \in \mathrm{S}_{\mu^{(t)}, \mathbf{i}}\right)$$
la méthode, c'est d'abord sample un point dans le S_i puis ensuite on tire suivant $\mathbb{P}\left(Y \in \cdot \mid\left[\mu^{(t)}\right]^T Y=s\right)$

In [30]:
# Example usage:
i_vector = [6, 7, 8]  # Example indices for a 3-dimensional hypercube
I = 11  # The total number of divisions along each dimension

# Sample a point from the hypercube
sampled_point = sample_from_hypercube(i_vector, I)
sampled_point

array([-0.0567083 ,  0.13940325,  0.49443709])