# HLMA 408: Lois de probabilité et visualisation interactive

***
> __Auteur__: Joseph Salmon <joseph.salmon@umontpellier.fr>

On va illustrer des lois discrètes classiques en donnant leur fonction de masse, fonction de répartition, et des tirages aléatoires de ses lois.
On observera ainsi l'impact du centrage (en: *centering*) et de la mise à l'échelle (en: *mise à l'échelle*) en partant d'un $X$ centré-réduit, et en investiguant $Y=\frac{X-\mu}{\sigma}$ pour les variables continues.

**Rem**: Pour la visualisation un paramètre de frémissement (en: *jitter*) permet de limiter les chevauchements des points qui représentent les tirages aléatoires.

La liste exhausite des lois disponibles sous `scipy` se trouve ici: https://docs.scipy.org/doc/scipy/reference/stats.html

Ce notebook est inspiré des posts suivants:
- https://medium.com/kapernikov/ipywidgets-with-matplotlib-93646718eb84
- https://medium.com/@jdchipox/how-to-interact-with-jupyter-33a98686f24e

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats 
import ipywidgets  # ipywidgets>=7.5

In [2]:
%matplotlib widget

## Visualisation, cas discret: fonctions de masse, de répartitions et tirages aléatoires 

In [3]:
def make_box_layout():
    return ipywidgets.Layout(
        border='solid 1px black',
        margin='0px 10px 10px 0px',
        padding='5px 5px 5px 5px',
    )

In [4]:
def keep_no_param_distribution_disc():
    distributions = stats._discrete_distns._distn_names
    distributions_0 = []
    for i, name in enumerate(distributions):
        dist = getattr(stats, name)
        if not dist.shapes or len(dist.shapes)==2 or len(dist.shapes)==1:
            distributions_0.append(name)
    distributions_0_val = [getattr(stats.distributions, string) for string in distributions_0 ]
    distributions_0_dict = dict(zip(distributions_0, distributions_0_val))
    return distributions_0_dict

# inspired by https://stackoverflow.com/questions/30453097/getting-the-parameter-names-of-scipy-stats-distributions

In [5]:
distributions_0_dict_disc = keep_no_param_distribution_disc()

In [6]:
class RandomWidgetDiscrete(ipywidgets.HBox):

    def __init__(self):
        super().__init__()
        output = ipywidgets.Output()
        self.n_samples = 30
        self.xranges = (-20, 20)  # Bornes d'observation
        self.yranges = (0, 0.5)  # Bornes d'observation
        self.x = np.arange(self.xranges[0], self.xranges[1])

        self.mu = 1
        self.distribution = distributions_0_dict_disc['poisson']
        self.samples = self.distribution.rvs(
            self.mu, size=self.n_samples,)

        self.size = 5
        self.initial_color = '#1a60e1'
        self.jitter = 0.10
        self.params = dict(
            color=self.initial_color, alpha=0.50, linewidth=0.2, edgecolor="black"
        )

        with output:
            self.fig, self.ax = plt.subplots(3, 1, sharex=True,
                                             num='Fonction de masse et tirages aléatoires',
                                             constrained_layout=True, figsize=(6, 4))

        self.line1, = self.ax[0].plot(self.x, self.distribution.pmf(self.x, self.mu),
                                      'bo', ms=8)
        self.line2 = self.ax[0].vlines(self.x, 0, self.distribution.pmf(
            self.x, self.mu), colors='k', linestyles='-', lw=1)
        plt.show()

        self.ax[0].set_xlabel(r'Fonction de masse')
        self.ax[0].set_xlim(self.xranges)
        self.ax[0].set_ylim(self.yranges)

        self.cdf, = self.ax[1].plot(self.x,
                                    self.distribution.cdf(
                                        self.x, self.mu),
                                    self.initial_color)

        self.ax[1].set_xlabel(r'Fonction de répartition')
        self.ax[1].set_xlim(self.xranges)
        self.ax[1].set_ylim((-0.1, 1.1))

        self.ax[2].set_xlabel(
            r'Tirages')
        self.ax[2].set_xlim(self.xranges)
        self.ax[2].axes.get_yaxis().set_visible(False)
        self.ax[2].set_ylim(-2, 2)

        self.y = self.jitter * np.random.randn(self.n_samples,)

        self.points = self.ax[2].scatter(self.samples, self.y, **self.params)
        self.fig.canvas.toolbar_position = 'bottom'

        # define widgets
        style = {'description_width': '100px'}
        layout = {'width': '300px'}

        n_samples_slider = ipywidgets.IntSlider(
            value=self.n_samples, min=1, max=500, step=1, description="$n$", style=style, layout=layout)
        self.mu_slider = ipywidgets.FloatSlider(
            value=1, min=0, max=15, step=0.1, description='$\mu$', style=style, layout=layout)
#         loc_slider = ipywidgets.FloatSlider(
#             value=0, min=-4, max=5, step=1, description='loc', style=style, layout=layout)
        color_picker = ipywidgets.ColorPicker(
            value=self.initial_color,
            description='Couleur', style=style, layout=layout)
        jitter_slider = ipywidgets.FloatSlider(
            value=self.jitter, min=0.005, max=0.3, step=0.005,
            description='Frémissement', style=style, layout=layout)
        text_distribution = ipywidgets.Dropdown(
            options=list(distributions_0_dict_disc),
            value='poisson',
            description='Distribution', style=style, layout=layout)
        int_xrange_slider = ipywidgets.FloatRangeSlider(
            value=self.xranges,
            min=-20, max=20, step=0.1,
            description="Zoom en x", style=style, layout=layout)
        int_yrange_slider = ipywidgets.FloatRangeSlider(
            value=(self.yranges),
            min=-1, max=3, step=0.1,
            description="Zoom en y", style=style, layout=layout)
        resample_button = ipywidgets.Button(
            description="Nouveau tirage", style=style, layout=layout)

        controls = ipywidgets.VBox([
            n_samples_slider,
            self.mu_slider,
            #             loc_slider,
            text_distribution,
            color_picker,
            int_xrange_slider,
            int_yrange_slider,
            jitter_slider,
            resample_button
        ])

        controls.layout = make_box_layout()
        out_box = ipywidgets.Box([output])
        output.layout = make_box_layout()

        # A Afficher
        n_samples_slider.observe(self.update_n_samples, 'value')
        self.mu_slider.observe(self.update_mu, 'value')
#         loc_slider.observe(self.update_loc, 'value')
        color_picker.observe(self.line_color, 'value')
        text_distribution.observe(self.update_text_distribution, 'value')
        int_xrange_slider.observe(self.update_xrange_slider, 'value')
        int_yrange_slider.observe(self.update_yrange_slider, 'value')
        jitter_slider.observe(self.update_jitter_slider, 'value')
        resample_button.on_click(self.update_resample_button)

        self.children = [controls, output]

    def update_mu(self, change):
        """Evolution with the mu parameter."""
        self.mu = change.new
        self.samples = self.distribution.rvs(
            self.mu, size=self.n_samples)
        self.line1.set_ydata(self.distribution.pmf(
            self.x, self.mu))
        self.line2.set_segments([np.array(
            [[x, 0], [x, self.distribution.pmf(x, self.mu)]]) for x in self.x])
        self.cdf.set_ydata(self.distribution.cdf(self.x, self.mu))
        self.y = self.jitter * np.random.randn(self.n_samples,)
        self.points.set_offsets(np.c_[[self.samples, self.y]].T)
        self.fig.canvas.draw()

    def update_n_samples(self, change):
        """Evolution with the n_samples parameter."""
        self.n_samples = change.new
        self.samples = self.distribution.rvs(
            self.mu, size=self.n_samples)
        self.y = self.jitter * np.random.randn(self.n_samples,)
        self.points.set_offsets(np.c_[[self.samples, self.y]].T)
        self.fig.canvas.draw()

    def update_jitter_slider(self, change):
        """Evolution with the jitter parameter."""
        jitter_old = self.jitter
        self.jitter = change.new
        self.y *= self.jitter / jitter_old
        self.points.set_offsets(np.c_[[self.samples, self.y]].T)
        self.fig.canvas.draw()

    def line_color(self, change):
        self.initial_color = change.new
        self.line.set_color(self.initial_color)
        self.params['color'] = self.initial_color
        self.cdf.set_color(self.initial_color)

        self.y = self.jitter * np.random.randn(self.n_samples,)
        self.points.set_offsets(np.c_[[self.samples, self.y]].T)
        self.points.set_color(self.initial_color)
        self.fig.canvas.draw()

    def update_xrange_slider(self, change):
        self.xranges = change.new
        self.line1.set_ydata(self.distribution.pmf(
            self.x, self.mu))
        self.line2.set_segments([np.array(
            [[x, 0], [x, self.distribution.pmf(x, self.mu)]]) for x in self.x])
        self.cdf.set_ydata(self.distribution.cdf(self.x, self.mu))

        self.ax[0].set_xlim(self.xranges)
        self.ax[2].set_xlim(self.xranges)
        self.fig.canvas.draw()

    def update_yrange_slider(self, change):
        self.yranges = change.new
        self.ax[0].set_ylim(self.yranges)
        self.fig.canvas.draw()

    def update_text_distribution(self, change):
        self.distribution = distributions_0_dict_disc[change.new]
        self.line1.set_ydata(self.distribution.pmf(
            self.x, self.mu))
        if self.distribution.name in ['geom', 'bernoulli', 'logser']:
            self.mu_slider.min, self.mu_slider.max, self.mu_slider.step = 0.001, 0.999, 0.001
        elif self.distribution.name in ['zipf', 'dlaplace']:
            self.mu_slider.min, self.mu_slider.max, self.mu_slider.step = 0, 15, 0.1
        elif self.distribution.name in ['poisson']:
            self.mu_slider.min, self.mu_slider.max, self.mu_slider.step = 0.1, 15, 0.1

        self.cdf.set_ydata(self.distribution.cdf(self.x, self.mu))
        self.line2.set_segments([np.array(
            [[x, 0], [x, self.distribution.pmf(x, self.mu)]]) for x in self.x])

        self.samples = self.distribution.rvs(self.mu, size=self.n_samples)
        # Jittering
        self.y = self.jitter * np.random.randn(self.n_samples,)
        self.points.set_offsets(np.c_[[self.samples, self.y]].T)

        self.points.set_color(self.initial_color)
        self.fig.canvas.draw()

    def update_resample_button(self, change):
        # Jittering
        self.samples = self.distribution.rvs(
            self.mu, size=self.n_samples)
        self.y = self.jitter * np.random.randn(self.n_samples,)
        self.points.set_offsets(np.c_[[self.samples, self.y]].T)

In [7]:
RandomWidgetDiscrete()

RandomWidgetDiscrete(children=(VBox(children=(IntSlider(value=30, description='$n$', layout=Layout(width='300p…