In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import expon, rayleigh, gumbel_l, uniform
import ipywidgets as widgets
from ipywidgets import BoundedIntText
from IPython.display import display, clear_output
import scienceplots

# Exercise 1: **Speckle**

Visualize the concept of the Central Limit Theorem by using self-generated samples of random variables drawn from one or more arbitrary probability distributions (e.g. Exponential, Gumbel,
Rayleigh, Uniform, Poisson, ...)


## 1.0 What is speckle

-   In an observed pixel, multiple elementary scatterers exhibit the same amplitude but random phase in response to an incident electromagnetic wave
-   The sensor combines these individual responses per pixel, giving rise to the phenomenon known as speckle = noisy looks


## 1.1 What is the main finding of the Central Limit Theorem?

-   The distribution of the means of a large number of samples, that were independent and randomly selected from a population, approaches a normal (Gaussian) distribution
-   This is regardless of the original distribution describing the populations


## 1.2 Why is it important for the interpretation of speckle?

-   As numerous scatterers contribute to the observed signal, the CLT suggests that the distribution of the average signal tends to be normal
-   Helps to characterize the variability of speckle patterns
-   Can be used to fitler out speckle (statistical filter)


## 1.3 What constraints must the considered variables obey if their behavior is described within the concept of the Central Limit Theorem

-   Variables need to be independent of each other (e.g. changes in amplitude $a_k$ and phase $\phi_k$ of each phasor)
    -   Phase changes are mot likely in the interval $\phi_k \in [-\pi, \pi]$
-   Vaiables should be sampled randomly
-   Each scatterer should be describable using the same porbability distribution
    -   variables should be identically distributed


## 1.4 How does the validity of the Central Limit Theorem depend on the sample size?

-   The larger the sample size, the faster the distribution of means approaches a normal distribution
-   The more often mean-samples are taken, the more precise the normal distribution becomes


In [2]:
def exp_distribution(N):
    return expon.rvs(size=N)


def ray_distribution(N):
    return rayleigh.rvs(size=N)


def gum_distribution(N):
    return gumbel_l.rvs(size=N)


def uni_distribution(N):
    return uniform.rvs(size=N)


def clt(N, mean, std_dev, iterations=100):
    return [
        np.mean(np.random.normal(mean, std_dev, N)) for _ in range(iterations)
    ]


def distribution_samples(distribution_func, N):
    return distribution_func(N)


def plot_histogram(ax, data, title):
    ax.hist(data,
            density=True,
            bins='auto',
            histtype='stepfilled',
            alpha=0.8,
            color='red',
            edgecolor='white')

    ax.set_yticks([])
    ax.set_title(title)


def update_data(sample_size, number_samples, N):
    clear_output(wait=True)

    distributions = [
        exp_distribution, ray_distribution, gum_distribution, uni_distribution
    ]
    fig, axs = plt.subplots(4, 2, figsize=(12, 8), gridspec_kw={'hspace': 0.5})
    axs = axs.flatten()

    for i, dist_func in enumerate(distributions):
        population = distribution_samples(dist_func, N)
        plot_histogram(axs[i * 2], population,
                       f'{dist_func.__name__} distribution function')

        approx_distribution = clt(sample_size, np.mean(population),
                                  np.std(population), number_samples)
        plot_histogram(axs[i * 2 + 1], approx_distribution,
                       'Approximated Normal Distribution')

    plt.suptitle(
        f'Distribution function with {N} continuous random variables and corresponding \n'
        f'distribution function with {number_samples} samples of size {sample_size}'
    )
    plt.show()

In [3]:
num_samp = widgets.BoundedIntText(min=0,
                                  max=1_000_000,
                                  step=100,
                                  description="Number of samples",
                                  value=100)
siz_samp = widgets.BoundedIntText(min=0,
                                  max=1_000,
                                  step=10,
                                  description="Sample size",
                                  value=500)
num_dist = widgets.BoundedIntText(min=0,
                                  max=1_000_000,
                                  step=100,
                                  description="Distribution size",
                                  value=10_000)

ui = widgets.HBox([widgets.VBox([num_samp, siz_samp, num_dist])])
out = widgets.interactive_output(update_data, {
    'sample_size': siz_samp,
    'number_samples': num_samp,
    'N': num_dist
})

display(out, ui)


Output()

HBox(children=(VBox(children=(BoundedIntText(value=100, description='Number of samples', max=1000000, step=100…