In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Scoring landscape of the crosslinking negative log likelihood version 2.0 from Riccardo Pellarin

* The likelihood is given by $$P(O_n|X) = \frac{\alpha}{\beta}P(XL_n|X) + \frac{(1-\alpha)}{1-\beta}P(\bar{XL_n}|X)$$
* $\alpha = \frac{N^{obs, T}_{XL}}{N^{obs}_{XL}}$ and $\beta = \frac{N_{XL}}{N_{LP}}$, also $\alpha$ and $\beta$ satisfy constrains $$\frac{\alpha}{\beta}\leq\frac{N_{LP}}{N^{obs}_{XL}};\quad \frac{1-\alpha}{1-\beta}\leq\frac{N_{LP}}{N^{obs}_{XL}}$$

* Example system
1) Consider 100 lysines, 4950 lysine pairs and 100 crosslinks observed

In [None]:
# More 2D plots to analyze the effect of alpha and beta on the scoring function
# First define some of the system parameters
n_lysines = 100
n_lysine_pairs = n_lysines * (n_lysines - 1) // 2
n_lysine_triplets = n_lysines * (n_lysines - 1) * (n_lysines - 2) // 6
n_xls_observed = 100

def likelihood(fm, alpha, beta):
    prob_true = fm
    prob_false = 1 - fm
    likelihood_value = (alpha / beta) * prob_true + ((1 - alpha) / (1 - beta)) * prob_false
    nll = -np.log(likelihood_value + 1e-300)  # negative log-likelihood
    return nll

'''
We do not know the true positives here.
Check the limits; set a forward model fm between 0 and 1 and
plot the alpha vs beta -log(likelihood) landscape.
'''
fmod = 0.5
alpha_vals = np.linspace(1e-10, 1.0 - 1e-10, 200)
beta_vals  = np.linspace(1.0 - 1e-10, 1e-10, 200)

# constraints: alpha/beta <= (n_lysine_pairs / n_xls_observed) and (1-alpha)/(1-beta) <= same bound
K = (n_lysine_pairs / n_xls_observed)
eps = 1e-12  # numerical guard to avoid division by zero

# Build surface; NaN marks forbidden regions (to color them distinctly)
lhood_surface = np.full((len(beta_vals), len(alpha_vals)), np.nan)
for j, b in enumerate(beta_vals):
    for i, a in enumerate(alpha_vals):
        cond1 = (a / (b + eps)) <= K
        cond2 = ((1 - a) / ((1 - b) + eps)) <= K
        if cond1 and cond2:
            lhood_surface[j, i] = likelihood(fmod, a, b)

# Find minima over valid region
valid_mask = np.isfinite(lhood_surface)
if np.any(valid_mask):
    global_min = np.nanmin(lhood_surface[valid_mask])

    # Minima tolerance: mark all cells within rtol/atol of the global min
    minima_rtol = 1e-11   # 0.1% relative to value
    minima_atol = 1e-11
    minima_mask = valid_mask & np.isclose(lhood_surface, global_min, rtol=minima_rtol, atol=minima_atol)
    minima_idx = np.argwhere(minima_mask)

    # For convenience, also keep one argmin for reference
    flat_min = np.nanargmin(lhood_surface)
    min_ij = np.unravel_index(flat_min, lhood_surface.shape)
    min_value = lhood_surface[min_ij]
    min_alpha = alpha_vals[min_ij[1]]
    min_beta  = beta_vals[min_ij[0]]

    print(f"Global minimum: {min_value:.6f} at alpha={min_alpha:.6f}, beta={min_beta:.6f}")
    print(f"Found {len(minima_idx)} minima within rtol={minima_rtol}, atol={minima_atol}")
else:
    print("No valid regions found.")
    minima_idx = np.empty((0, 2), dtype=int)

# Plotting the likelihood heatmap with contours and forbidden regions
import matplotlib.patches as mpatches
from matplotlib.colors import TwoSlopeNorm

plt.figure(figsize=(10, 6))

# Blue (low) -> White (mid) -> Red (high); NaNs (forbidden) shown in yellow
Z = np.ma.masked_invalid(lhood_surface)
cmap = plt.cm.bwr.copy()
cmap.set_bad('yellow')  # forbidden regions in yellow

# Center white at the median of valid values
vmin = float(Z.min()) if Z.size else 0.0
vmax = float(Z.max()) if Z.size else 1.0
vcenter = float(np.ma.median(Z)) if Z.size else 0.5
norm = TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)

mesh = plt.pcolormesh(alpha_vals, beta_vals, Z, cmap=cmap, norm=norm, shading='auto')

# Add contours over valid regions
if Z.size:
    levels = np.linspace(vmin, vmax, 10)
    cs = plt.contour(alpha_vals, beta_vals, Z.filled(np.nan), levels=levels, colors='white', linewidths=0.6)
    plt.clabel(cs, inline=True, fontsize=8, fmt='%.2f')

# Mark all minima
min_handle = None
if minima_idx.size > 0:
    min_alphas = alpha_vals[minima_idx[:, 1]]
    min_betas  = beta_vals[minima_idx[:, 0]]
    min_handle = plt.scatter(min_alphas, min_betas, marker='*', color='black', s=160,
                             edgecolors='white', linewidth=0.8,
                             label=f"Minima (<= {minima_rtol*100:.1f}% rtol)")

plt.ylabel('Beta')
plt.xlabel('Alpha')
plt.title(f'Negative Log-Likelihood Heatmap (fm={fmod:.2f})\nForbidden regions in yellow')
plt.colorbar(mesh, label='-log(Likelihood)')

# Legend entries
forbidden_patch = mpatches.Patch(color='yellow', label='Forbidden region')
handles = [forbidden_patch]
if min_handle is not None:
    handles.insert(0, min_handle)
plt.legend(handles=handles, loc='best')

plt.tight_layout()
plt.show()