In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from torch import nn
import torch

In [None]:
loss = nn.GaussianNLLLoss()
input = torch.randn(5, 2, requires_grad=True)
target = torch.randn(5, 2)
var = torch.ones(5, 2, requires_grad=True)  # heteroscedastic
output = loss(input, target, var)
output.backward()

In [None]:
import scipy.stats
import numpy as np
import random
import pandas as pd
import pandas.api.types
import scipy.stats
import matplotlib.pyplot as plt

In [None]:
n_samples = 100
x = np.linspace(0, 1, n_samples)
rands = [random.random() for i in range(n_samples)]

In [None]:
n_wavelengths_input = 5
n_samples = 10


def get_pdfs(
    n_wavelengths_input=n_wavelengths_input,
    n_samples=n_samples,
    sigma_scale=0.1,
    solution_fraction=0.9,
):
    submission_values = np.array(
        [random.random() for _ in range(n_samples * n_wavelengths_input)]
    ).reshape(n_samples, n_wavelengths_input)
    submission_sigma = np.ones_like(submission_values) * sigma_scale
    submission = np.concatenate((submission_values, submission_sigma), axis=1)

    naive_mean = np.mean(submission_values)
    naive_sigma = np.std(submission_values)

    solution = submission_values * solution_fraction

    sigma_true = np.std(solution)

    print(f"{solution.shape=}")
    print(f"{submission.shape=}")

    solution = solution.copy()
    submission = submission.copy()

    n_wavelengths = solution.shape[1]

    y_pred = submission[:, :n_wavelengths]
    # Set a non-zero minimum sigma pred to prevent division by zero errors.
    sigma_pred = np.clip(submission[:, n_wavelengths:], a_min=10**-15, a_max=None)
    y_true = solution
    print(f"{y_true.shape=}")
    print(f"{y_pred.shape=}")
    print(f"{sigma_pred.shape=}")
    pred_pdf = scipy.stats.norm.logpdf(y_true, loc=y_pred, scale=sigma_pred)
    GLL_pred = np.sum(pred_pdf)
    true_pdf = scipy.stats.norm.logpdf(
        y_true, loc=y_true, scale=sigma_true * np.ones_like(y_true)
    )
    GLL_true = np.sum(true_pdf)
    mean_pdf = scipy.stats.norm.logpdf(
        y_true,
        loc=naive_mean * np.ones_like(y_true),
        scale=naive_sigma * np.ones_like(y_true),
    )
    GLL_mean = np.sum(mean_pdf)

    submit_score = (GLL_pred - GLL_mean) / (GLL_true - GLL_mean)
    print(f"{submit_score =: .4f} ({float(np.clip(submit_score, 0.0, 1.0))})")
    return mean_pdf, pred_pdf, true_pdf

In [None]:
mean_pdf.shape, pred_pdf.shape, true_pdf.shape

In [None]:
x = np.linspace(-1, 1, 100)
rv = scipy.stats.norm()
for solution_fraction in [1, 0.9, 0.8]:
    mean_pdf, pred_pdf, true_pdf = get_pdfs(
        n_wavelengths_input=n_wavelengths_input,
        n_samples=n_samples,
        sigma_scale=0.1,
        solution_fraction=0.9,
    )
    plt.figure()
    for i_sample in range(n_samples):
        plt.plot(
            range(n_wavelengths_input),
            mean_pdf[i_sample],
            "b-",
            lw=2,
            label=f"mean_pdf ({solution_fraction})",
        )
        plt.plot(
            range(n_wavelengths_input),
            pred_pdf[i_sample],
            "g-",
            lw=2,
            label=f"pred_pdf ({solution_fraction})",
        )
        plt.plot(
            range(n_wavelengths_input),
            true_pdf[i_sample],
            "r-",
            lw=2,
            label=f"true_pdf ({solution_fraction})",
        )
        plt.legend()

In [None]:
gaussian_pred = scipy.stats.norm.pdf(rands, loc=x, scale=np.mean(x))  # * 0.2
plt.figure()
plt.plot(x, rands, label="rands")
plt.plot(x, gaussian_pred, label="gaussian")
plt.legend()
plt.show()