In [None]:
# init repo notebook
!git clone https://github.com/rramosp/ppdl.git > /dev/null 2> /dev/null
!mv -n ppdl/content/init.py ppdl/content/local . 2> /dev/null
!pip install -r ppdl/content/requirements.txt > /dev/null

# Lab 02.03.3: Multiclass Bayesian Logistic Regression

In this lab, we will implement a multilevel logistic regression model using the `tensorflow_probability`'s `JointDistribution` API.

In [None]:
import inspect
from rlxmoocapi import submit, session
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from sklearn.datasets import make_blobs
from sklearn.neighbors import KernelDensity
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
plt.style.use("ggplot")

tfd = tfp.distributions

In [None]:
course_id = "ppdl.v1"
endpoint = "https://m5knaekxo6.execute-api.us-west-2.amazonaws.com/dev-v0001/rlxmooc"
lab = "L02.03.03"

In [None]:
session.LoginSequence(
    endpoint=endpoint,
    course_id=course_id,
    lab_id=lab,
    varname="student"
    );

## Data Loading

In [None]:
n_features = 2
n_classes = 3
X, y = make_blobs(
        n_samples=500,
        n_features=n_features,
        centers=n_classes,
        random_state=42
        )

We can visualize the data:

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
ax.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap="viridis", alpha=0.5)
ax.set_xlabel("$x_1$")
ax.set_ylabel("$x_2$")

## Task 1: Implement a Logistic Regression Model

In this task, you must define a logistic regression model using the `JointDistributionNamed` API, you must implement the following model:

$$
w \sim \mathcal{N}([0, 0], [1, 1])\\
b \sim \mathcal{N}(0, 1)\\
\text{logits} = \mathbf{w}\cdot\mathbf{x} + b\\
y \sim \text{categorical}(\text{logits})
$$

The model must have the following distributions: `w`, `b`, and `y`:

In [None]:
def prob_log_regression(x, n_features, n_classes):
    # YOUR CODE HERE
    model = ...
    return model

You can use the model to generate samples:

In [None]:
model = prob_log_regression(X, n_features, n_classes)
samples = model.sample(1)
print(samples["y"].shape)

In [None]:
def grader1(functions, variables, caller_userid):
    import tensorflow as tf
    import tensorflow_probability as tfp
    tfd = tfp.distributions

    namespace = locals()
    for f in functions.values():
        exec(f, namespace)

    n_features = 2
    n_classes = 3
    n_samples = 500
    X, y = make_blobs(
            n_samples=n_samples,
            n_features=n_features,
            centers=n_classes,
            random_state=42
            )
    
    prob_log_regression = namespace["prob_log_regression"]
    model_student = prob_log_regression(X, n_features, n_classes)
    msg = "Validating probabilistic model...</br>"
    if not isinstance(model_student, tfd.JointDistribution):
        msg += "<b>Your model is not a joint distribution.</b></br>"
        return 0, msg


    dists = model_student.sample_distributions()[0]
    expected_dists = {
            "w": {
                "type": tfd.Normal,
                "batch_shape": [n_features, n_classes],
                "event_shape": []
                },
            "b": {
                "type": tfd.Normal,
                "batch_shape": [n_classes],
                "event_shape": []
                },
            "y": {
                "type": tfd.Independent,
                "batch_shape": [],
                "event_shape": [n_samples],
                }
            }
    for param, values in expected_dists.items():
        if param not in dists:
            msg += f"<b>Your model doesn't contain the distribution '{param}'</b></br>"
            return 0, msg
        if not isinstance(dists[param], values["type"]):
            msg += f"<b>The distribution '{param}' is incorrect.</b>"
            return 0, msg

        if list(dists[param].batch_shape) != values["batch_shape"]:
            msg += f"<b>The distribution '{param}' has a wrong batch_shape.</b>"
            return 0, msg

        if list(dists[param].event_shape) != values["event_shape"]:
            msg += f"<b>The distribution '{param}' has a wrong event_shape.</b>"
            return 0, msg

    return 5, msg + "<b>Success!</b>"

Use the following cell to grade your code:

In [None]:
teacher.set_grader(
        teacher.course_id, lab, "T1",
        inspect.getsource(grader1), "grader1",
        source_functions, source_variables
        )

In [None]:
student.submit_task(namespace=globals(), task_id="T1");

## Task 2: Markov Chain Monte Carlo

Implement the `mcmc` function to train the model, you must use a Markov Chain Monte Carlo Strategy, We recommend using the `NoUTurnSampler` but feel free to experiment with the sampler.

In [None]:
samples, log_probs = mcmc(
        model=model,
        y=y,
        n_features=n_features,
        n_classes=n_classes,
        num_samples=10,
        burning_steps=10,
        )

In [None]:
def grader2(functions, variables, caller_userid):
    import tensorflow as tf
    import numpy as np
    import tensorflow_probability as tfp

    tf.random.set_seed(0)
    tfd = tfp.distributions

    namespace = locals()
    for f in functions.values():
        exec(f, namespace)

    # compare descriptive stats using the same base model.

    n_features = 2
    n_classes = 2
    n_gen_samples = 100
    burnout = 100

    X = np.array([
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
        ] * 10, dtype="float32")
    y = {
            "and": np.array([0, 0, 0, 1] * 10),
            "or": np.array([0, 1, 1, 1] * 10),
            }

    mcmc = namespace["mcmc"]
    msg = "Validating mcmc procedure...</br>"

    for case, y_i in y.items():

        model = tfd.JointDistributionNamedAutoBatched({
            "w": tfd.Normal(loc=tf.zeros(shape=(n_features, n_classes)), scale=1.),
            "b": tfd.Normal(loc=tf.zeros(shape=(n_classes, )), scale=1.),
            "y": lambda b, w: tfd.Independent(
                tfd.Categorical(logits=X @ w + b),
                reinterpreted_batch_ndims=1
                )
            })
        samples, log_probs = mcmc(
                model=model,
                y=y_i,
                n_features=n_features,
                n_classes=n_classes,
                num_samples=n_gen_samples,
                burning_steps=burnout,
                )

        if samples[0].shape != tf.TensorShape([n_gen_samples, n_features, n_classes]):
            msg += "<b>Your function returns samples with wrong w shapes.</b></br>"
            return 0, msg

        if samples[1].shape != tf.TensorShape([n_gen_samples, n_classes]):
            msg += "<b>Your function returns samples with wrong b shapes.</b></br>"
            return 0, msg

        if log_probs.shape != tf.TensorShape([n_gen_samples]):
            msg += "<b>Your function returns log_probs with wrong shape.</b></br>"
            return 0, msg

        w = samples[0].numpy().mean(axis=0)
        b = samples[1].numpy().mean(axis=0)

        y_pred = np.argmax(X @ w + b, axis=1)

        if (y_pred == y_i).sum() / y_i.size < 0.9:
            msg += "<b>The mcmc function does not optimize the specified model.</b></br>"
            msg += str(y_pred)
            msg += str(y_i)
            return 0, msg

    return 5, msg + "<b>Success!</b>"

In [None]:
student.submit_task(namespace=globals(), task_id="T2");

## Task 3: MAP Predictions

Compute the Maximum Aposteriori predictions of the parameters as follows:

$$
w_{map} = \text{argmax}(\text{kde}(w_{samples}))\\
b_{map} = \text{argmax}(\text{kde}(w_{samples}))\\
\hat{y} = \text{argmax}(\text{softmax}(X, w_{map}, b_{map}))
$$

You must use `KernelDensity` for the estimation of the MAP parameters.

In [None]:
def kde(w_samples, b_samples, x, kernel="gaussian", bandwidth=0.1):
    # YOUR CODE HERE
    preds = ...
    return preds

Let's generate some predictions:

In [None]:
preds = kde(samples[0].numpy(), samples[1].numpy(), X)

In [None]:
def grader3(functions, variables, caller_userid):
    import numpy as np
    import tensorflow as tf
    from sklearn.datasets import make_blobs
    from sklearn.neighbors import KernelDensity

    namespace = locals()
    for f in functions.values():
        exec(f, namespace)

    kde_student = namespace["kde"]

    def kde(w_samples, b_samples, x, kernel="gaussian", bandwidth=0.1):
        flat_w = w_samples.reshape(
                (-1, np.prod(w_samples.shape[1:]))
                )
        w_densities = (
                KernelDensity(kernel=kernel, bandwidth=bandwidth)
                .fit(flat_w)
                .score_samples(flat_w)
                )
        idx = np.argmax(w_densities)
        w_map = w_samples[idx]

        flat_b = b_samples.reshape(
                (-1, np.prod(b_samples.shape[1:]))
                )
        b_densities = (
                KernelDensity(kernel=kernel, bandwidth=bandwidth)
                .fit(flat_b)
                .score_samples(flat_b)
                )
        idx = np.argmax(b_densities)
        b_map = b_samples[idx]

        probs = tf.nn.softmax(x @ w_map + b_map)
        preds = np.argmax(probs, axis=1)
        return preds

    msg = "Testing your kde function with 10 random trials</br>"

    for _ in range(10):
        n_samples = np.random.randint(1, 100)
        n_gen_samples = np.random.randint(1, 10)
        num_features = np.random.randint(2, 10)
        num_classes = np.random.randint(2, 10)

        samples_w = np.random.normal(size=(n_gen_samples, num_features, num_classes))
        samples_b = np.random.normal(size=(n_gen_samples, num_classes))
        x, _ = make_blobs(n_samples, num_features, centers=num_classes)

        preds_student = kde_student(samples_w, samples_b, x)
        preds_teacher = kde(samples_w, samples_b, x)

        if not np.allclose(preds_student, preds_teacher):
            msg += "The MAP predictions do not match the expected results."
            return 0, msg

    return 5, msg + "<b>Success!</b>"

In [None]:
student.submit_task(namespace=globals(), task_id="T3");

## Model Evaluation

Let's evaluate the MAP model

In [None]:
print(classification_report(y, preds))