# Causality course 01 - Foundations in probability and statistics

Semester: Fall 2025

Author: Marcell Stippinger

Affiliation: HUN-REN Wigner RCP



In [None]:
# import common libraries
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import norm, uniform
from scipy.optimize import curve_fit

## Explore distributions

In [None]:
def pdf_and_cdf_plot(distribution):
    """Plot the PDF and CDF of a given distribution."""
    low = distribution.ppf(0.01)
    high = distribution.ppf(0.99)
    ext = (high - low) * 0.1
    x = np.linspace(low-ext, high+ext, 1000)
    pdf = distribution.pdf(x)
    cdf = distribution.cdf(x)

    fig, ax1 = plt.subplots()

    color = 'tab:blue'
    ax1.set_xlabel('x')
    ax1.set_ylabel('PDF', color=color)
    ax1.plot(x, pdf, color=color)
    ax1.tick_params(axis='y', labelcolor=color)

    ax2 = ax1.twinx()
    color = 'tab:red'
    ax2.set_ylabel('CDF', color=color)
    ax2.plot(x, cdf, color=color)
    ax2.tick_params(axis='y', labelcolor=color)

    plt.title(f'PDF and CDF of {distribution.dist.name} Distribution')
    plt.grid()
    plt.show()

In [None]:
# Programmatically list all continuous distribution names from scipy.stats
distributions = [dist for dist in dir(stats) if isinstance(getattr(stats, dist), stats.rv_continuous)]
print(distributions, sep=', ')

In [None]:
# Get help for a given distribution
help(stats.binom)

In [None]:
# Normal or Gaussian distribution
pdf_and_cdf_plot(norm(loc=0, scale=1))

In [None]:
# Uniform distribution
pdf_and_cdf_plot(uniform(loc=2, scale=3))

**Activity**: try out some of the distributions from the list

## Structural asymmetry demo

* $X=U_X$ and $Y=X+U_Y$
* $X\to Y$ can be recovered if
* at least one of $U_X$ and $U_Y$ is not normal distribution

In [None]:
def make_data(x_distro, noise_distro, trf = lambda x: x):
    n = 1000
    x = x_distro.rvs(n)
    noise = noise_distro.rvs(n)
    y = trf(x) + noise
    return x, y

def do_best_fit(x, y, trf = lambda x, a, b: a*x + b):
    params, params_covariance = curve_fit(trf, x, y)
    fitted = trf(x, *params)
    residual = np.mean(np.abs(y - fitted))
    return params, fitted, residual

def plot_data_and_fit(ax, x, y, x_fitted, y_fitted, title = ""):
    ax.scatter(x, y, label='Data', alpha=0.5)
    ax.plot(x_fitted, y_fitted, color='red', label='Best Fit', linewidth=2)
    ax.set_title(title)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.legend()
    ax.grid()

def plot_residuals(ax, x, y, x_fitted, y_fitted, title = ""):
    ax.scatter(x-x_fitted, y-y_fitted, label='Data', alpha=0.5)
    ax.set_title(title)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.legend()
    ax.grid()

def structural_asymmetry_demo(x_distro=uniform, noise_distro=norm, trf=lambda x: x):
    # Linear relationship
    x, y = make_data(x_distro, noise_distro, trf=trf)
    lin_trf = lambda x, a, b: a*x + b
    fig, ax = plt.subplots(2, 2, figsize=(10, 10))
    params, y_fitted, residual = do_best_fit(x, y, lin_trf)
    plot_data_and_fit(ax[0, 0], x, y, x, y_fitted, title=f'Linear Fit: y = {params[0]:.2f}x + {params[1]:.2f}, Residual: {residual:.2f}')
    plot_residuals(ax[0, 1], x, y, 0, y_fitted, title=f'After transform')
    params, x_fitted, residual = do_best_fit(y, x, lin_trf)
    plot_data_and_fit(ax[1, 0], x, y, x_fitted, y, title=f'Linear Fit: x = {params[0]:.2f}y + {params[1]:.2f}, Residual: {residual:.2f}')
    plot_residuals(ax[1, 1], x, y, x_fitted, 0, title=f'After transform')


In [None]:
# uniform and normal
structural_asymmetry_demo(x_distro=uniform(loc=0, scale=10), noise_distro=norm(loc=0, scale=2), trf=lambda x: 2*x + 3)

In [None]:
# uniform and uniform
structural_asymmetry_demo(x_distro=uniform(loc=0, scale=2), noise_distro=uniform(loc=0, scale=2), trf=lambda x: 1.5*x + 7)

### Gaussian random variable with Gaussian noise

When two *normal distributions* are involved, the asymmetry disappears because they always form a joint multivariate normal distribution.

Some observations:
* Let $X\sim\mathcal{N}(\mu, \sigma^2)$ then, $Y=(aX+b)\sim\mathcal{N}(a\mu+b, a^2\sigma^2)$.
* Let $X_1\sim\mathcal{N}(\mu_1, \sigma_1^2)$ and $X_2\sim\mathcal{N}(\mu_2, \sigma_2^2)$, then $Y=(X_1+X_2)\sim\mathcal{N}(\mu_1+\mu_2, \sigma_1^2+\sigma_2^2)$.
* A $d$-dimensional joint multivariate normal distribution $X_i$, $i\in\{1,...,d\}$ is determined by its means and covariances:
  $$ \phi(\mathbf{x})=(2\pi )^{-k/2}\det({\boldsymbol {\Sigma }})^{-1/2}\,\exp \left(-{\frac {1}{2}}(\mathbf {x} -{\boldsymbol {\mu }})^{\mathrm {T} }{\boldsymbol {\Sigma }}^{-1}(\mathbf {x} -{\boldsymbol {\mu }})\right)$$
  where $\mathbf{x}\in\mathbb{R}^d$, $\mu_i=\mathrm{E}[X_i]$ and $\Sigma_{ij} = \mathrm{Cov}(X_i, X_j)$.
* All of its projections (and thus marginals) are normal distributions: $Y=(\sum_{i=1}^d a_i X_i)\sim\mathcal{N}(\mu_a, \sigma_a^2)$ with some $\mu_a$ and $\sigma_a$.
* All of its slices (i.e., conditionals) are also normal distributions.

Formally, for $X=U_X$ and $Y=X+U_Y$ with $U_X\sim\mathcal{N}(\mu_a, \sigma_a^2)$ and $U_Y\sim\mathcal{N}(\mu_b, \sigma_b^2)$ independents:
* $X\sim\mathcal{N}(\mu_X = \mu_a, \sigma_X = \sigma_a^2)$ and $Y\sim\mathcal{N}(\mu_Y = \mu_a+\mu_b, \sigma_Y^2 = \sigma_a^2+\sigma_b^2)$
* $\mathbf{Z}=(X, Y) \sim \mathcal{N}({\boldsymbol {\mu }}, {\boldsymbol {\Sigma }})$ with ${\boldsymbol {\mu }}=(\mu_a, \mu_a+\mu_b)$ and
  $$ {\boldsymbol {\Sigma }} = \left(\begin{array}{cc} \sigma_a^2 & \sigma_a^2 \\ \sigma_a^2 & \sigma_a^2 + \sigma_b^2\end{array}\right)$$
* because $\mathrm{Cov}(X, Y) = \mathrm{E}[(X-\mu_X)(Y-\mu_Y)] = \mathrm{E}[(X-\mu_X)(X-\mu_X)] +\mathrm{E}[(X-\mu_X)(U_Y-\mu_b)] = \sigma_X^2 + 0 = \sigma_a^2 $ due to the independence of $U_X$ and $U_Y$
* Moreover, their correlation is $$\rho_{XY}=\frac{\mathrm{Cov}⁡(X,Y)}{\sqrt{\mathrm{Var}⁡(X)\mathrm{Var}⁡(Y)}}=\frac{\sigma_a^2}{\sigma_a\sqrt{\sigma_a^2+\sigma_b^2}}=\frac{\sigma_a}{\sqrt{\sigma_a^2+\sigma_b^2}},$$
* and the conditional distribution $(Y∣X=x)\sim\mathcal{N}(x+\mu_b, \sigma_b^2)$.

After removing the fitted curve, the residual is uncorrelated Gaussian, i.e., the asymmetry cannot be found. Both $X$ and $Y$ are projections of a joint multivariate normal distribution $Z$. 

For more information, see, e.g., C. Glymour, K. Zhang, and P. Spirtes. 2019. Review of causal discovery methods based on graphical models. Frontiers in Genetics 10 (2019).

In [None]:
# normal and normal
structural_asymmetry_demo(x_distro=norm(loc=0, scale=1), noise_distro=norm(loc=0, scale=1), trf=lambda x: 1.5*x + 7)