In [6]:
import numpy as np
import lightgbm as lgb
from scipy.optimize import minimize_scalar
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split


1. LightGBM's default init_score (implicitly chosen).
2. Empirical formula: $F_0=\log \left(\frac{\bar{y}}{1-\bar{y}}\right)$
3. Numerical minimizer: Solve

$$
\min _\zeta \sum_{i=1}^n\left[-y_i \log \sigma(\zeta)-\left(1-y_i\right) \log (1-\sigma(\zeta))\right]
$$


In [7]:
# Generate binary classification dataset
X, y = make_classification(n_samples=500, weights=[0.3], random_state=42)

In [8]:
# Empirical class proportion
p = y.mean()

# 1. Closed-form (log-odds)
F0_formula = np.log(p / (1 - p))

In [9]:
# 2. Numerical minimization of log-loss for constant prediction
def constant_logloss(z):
    sigmoid = 1 / (1 + np.exp(-z))
    return -np.sum(y * np.log(sigmoid) + (1 - y) * np.log(1 - sigmoid))

F0_numeric = minimize_scalar(constant_logloss).x

In [10]:
F0_formula

np.float64(0.8188868585544236)

In [11]:
F0_numeric

np.float64(0.8188868871945704)