In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

pio.templates.default = "plotly_white"


In [2]:
def cross_entropy(p, q, base=2):
    """
    Perform some checks, then compute the cross-entropy between `p` and `q`.
    """
    for distr in [p, q]:
        # check probabilities sum up to 1
        err_mess = f"the probabilities of {distr} should sum up to 1"
        assert np.isclose(sum(distr), 1), err_mess

        # check non-negative probabilities
        err_mess = f"all probabilities of {distr} should be non-negative"
        assert np.alltrue([val >= 0 for val in distr]), err_mess

        # check probabilities at most 1
        err_mess = f"all probabilities of {distr} should be at most 1"
        assert np.alltrue([val <= 1 for val in distr]), err_mess

    # check same array lengths
    err_mess = f"both arrays should have the same length"
    assert len(p) == len(q), err_mess

    cross_ent = 0
    for p_i, q_i in zip(p, q):
        if q_i == 0:
            continue
        cross_ent -= p_i * np.log(q_i) / np.log(base)

    return cross_ent


def entropy(p, base=2):
    """
    Return the entropy of `p` (that is, the cross entropy with itself).
    """
    return cross_entropy(p=p, q=p, base=base)


def KL_div(distr_from, distr_to, base=2):
    """
    Compute the KL divergence from `distr_from` to `distr_to`.
    """
    cross_ent = cross_entropy(p=distr_from, q=distr_to, base=base)
    ent = entropy(p=distr_from, base=base)
    return cross_ent - ent


def fit_poly(X, y):
    """
    Fit a linear model with polynomial features to data `X` and target `y`.
    """
    w_ml = np.linalg.solve(X.T.dot(X), X.T.dot(y))
    w_ml.reshape(1, (len(w_ml)))
    sigma2_ml = np.mean((y - X.dot(w_ml.T)) ** 2)
    return w_ml, sigma2_ml


def gaussian_loglik(X, y, w_ml, sigma2_ml):
    """
    Compute the Gaussian log-likelihood of parameterers `w_ml`, `sigma2_ml` with data `X` and target `y`.
    """
    N = len(y)
    loglik = -N / 2 * np.log(2 * np.pi * sigma2_ml)  # term 1
    loglik -= 1 / (2 * sigma2_ml) * np.sum((y - X.dot(w_ml.T)) ** 2)  # term 2
    return loglik


def deviance(loglik):
    """
    Compute the deviance of a model, given its log-likelihood.
    """
    return -2 * np.array(loglik)


def AIC(training_deviance, n_params):
    """
    Compute the Akaike Information Criterion of a model.
    """
    return training_deviance + 2 * n_params


def AICc(training_deviance, n_params, n_obs):
    """
    Compute the corrected Akaike Information Criterion of a model.
    Usually used if `n_obs / n_params < 40`.
    """
    aic = AIC(training_deviance, n_params)
    corr = 2 * n_params * (n_params + 1) / (n_obs - n_params - 1)
    return aic + corr


def BIC(training_deviance, n_params, n_obs):
    """
    Compute the Akaike Information Criterion of a model.
    """
    return training_deviance + 2 * n_params * np.log(n_obs)
