In [1]:
import warnings
import os
os.environ["OMP_NUM_THREADS"] = "6"
# warnings.filterwarnings("ignore", message="KMeans is known to have a memory leak*")
# warnings.filterwarnings("ignore", message="Solution may be inaccurate*")

In [2]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

import stratifreg
from stratifreg.two_groups import Joint2Regressor
from stratifreg.k_groups import JointKRegressor
from stratifreg.gmm_groups import Joint2GMMRegressor
from stratifreg.utils import JointUtils

In [3]:
def polynomial_tramsform(X, degree=2, include_bias=False):
    """
    Transforme un X (n, p) en toutes les colonnes de polynômes croisés jusqu'à 'degree'
    Exemple : degree=2 → 1, x1, x2, ..., x1^2, x2^2, x1*x2, etc.
    """
    poly = PolynomialFeatures(degree=degree, include_bias=include_bias)
    X_poly       = poly.fit_transform(X)
    colname_poly = poly.get_feature_names_out(X.columns)
    return X_poly, colname_poly

def get_data(path_X,path_y,scale=False,degree=0):
    scaler = StandardScaler()
    X = pd.read_csv(path_X)
    y = pd.read_csv(path_y)
    columnsXpoly = X.columns
    if degree>1: 
        X_poly,columnsXpoly = polynomial_tramsform(X,degree,False)
        X = X_poly
    X = JointUtils._as_numpy(X)
    y = JointUtils._as_numpy(y)
    y_med  = np.median(y)
    X  = pd.DataFrame(scaler.fit_transform(X), columns=columnsXpoly)
    X1,X2,y1,y2  = JointUtils.split_by_median(X,y, group_mode='median')
    if scale:
        X = scaler.fit_transform(X)
        #X1 = scaler.fit_transform(X1)
        #X2 = scaler.fit_transform(X2)
    Xc          = JointUtils.add_intercept(X)
    X1c         = JointUtils.add_intercept(X1)
    X2c         = JointUtils.add_intercept(X2)
    return X,Xc,y,X1,X1c,y1,X2,X2c,y2,y_med,columnsXpoly

X,Xc,y,X1,X1c,y1,X2,X2c,y2,y_med,varnames = get_data('./datasets/Xf_all_prediabet.csv',
                                                     './datasets/yf_all_prediabet.csv')
print(Xc.shape,X1c.shape,X2c.shape,y.shape,y1.shape,y2.shape)

(3059, 5) (1558, 5) (1501, 5) (3059,) (1558,) (1501,)


In [4]:
reg = Joint2Regressor()
[beta1, beta2], var_beta, sigma2s = reg.fit_ols_groups(X1c, X2c, y1, y2, sigma_mode='two')
print(Joint2Regressor.display(reg, varnames,"beta"))

            beta_G1  beta_G2
intercept   53.3515  73.9497
Sex          0.8170  -0.7908
IMD_Decile   0.8992   0.1527
BMI         -0.7765  -2.4729
HbA1C        0.2593   0.2144


In [5]:
kreg = JointKRegressor()
kreg.fit([(X1c,y1),(X2c,y2)], joint_X_list=None, loss='quadratic', 
         tau=0.0, l1=0., l2=0., weights_list=None)
print(JointKRegressor.display(kreg,varnames))

            model_G1  model_G2
intercept    53.3515   73.9497
Sex           0.8170   -0.7908
IMD_Decile    0.8992    0.1527
BMI          -0.7765   -2.4729
HbA1C         0.2593    0.2144


In [None]:
gmmreg = Joint2GMMRegressor()
gmmreg.fit(X1c, X2c, y1, y2, x0=None, m1=1, m2=1, max_iter=10)
print(Joint2Regressor.display(reg, varnames,"beta"))

In [None]:
kreg = JointKRegressor()
kreg.fit([(X1c,y1),(X2c,y2)], joint_X_list=None, loss='quantile', 
                   tau=0.5, l1=0., l2=0., weights_list=None)
print(JointKRegressor.display(kreg, varnames,"beta"))

In [None]:
x0_c  = JointUtils.find_x0(Xc, y, y_med)
x0_LL = JointUtils.find_x0_LL(Xc, y, y_med, L=1)
regctr_x0c  = Joint2Regressor()
resu_x0c    = regctr_x0c.fit_ols_jointure_a_b(X1c, X2c, y1, y2, x0_c, 
                                  y0=None, sigma_mode='one', cas='a')
regctr_x0LL = Joint2Regressor()
resu_x0LL   = regctr_x0LL.fit_ols_jointure_a_b(X1c, X2c, y1, y2, x0_LL, 
                                  y0=None, sigma_mode='one', cas='a')
print(Joint2Regressor.display(regctr_x0c, varnames,"beta_x0_c"))
print(Joint2Regressor.display(regctr_x0LL, varnames,"beta_x0_LL"))

In [None]:
gmmreg = Joint2GMMRegressor()
gmmreg.fit(X1c, X2c, y1, y2, x0=x0_c, m1=1, m2=1,max_iter=10)
print(Joint2GMMRegressor.display(gmmreg,varnames,"beta"))

In [None]:
gmmreg = Joint2GMMRegressor()
gmmreg.fit(X1c, X2c, y1, y2, x0=x0_c, m1=2, m2=2,max_iter=10)
print(Joint2GMMRegressor.display(gmmreg,varnames,"beta"))

In [None]:
kreg = JointKRegressor()
group_list = [(X1c,y1),(X2c,y2)]
kreg.fit(group_list, joint_X_list=None, loss='quadratic', 
         tau=0.5, l1=0.0, l2=0.0, weights_list=None)
print(JointKRegressor.display(kreg,varnames,"beta"))

In [None]:
kreg = JointKRegressor()
kreg.fit([(X1c,y1),(X2c,y2)], joint_X_list=[x0_c], loss='quadratic', 
         tau=0.5, l1=0.0, l2=0.0, weights_list=None) 
print(JointKRegressor.display(kreg,varnames))

In [None]:
reg = Joint2Regressor()
betas, var_beta, sigma2s = reg.fit_ols_jointure_smoothed(X1c, X2c, y1, y2, x0_c, lc=10.0)
print(Joint2Regressor.display(reg, varnames,"beta_S_x0_"))

In [None]:
kreg = JointKRegressor()
#X1c1,X1c2,y11,y12  = JointUtils.split_by_median(X1c,y1, group_mode='median')
X2c1,X2c2,y21,y22  = JointUtils.split_by_median(X2c,y2, group_mode='median')
jl = [x0_c,JointUtils.find_x0(X2c, y2, np.median(y2))]             
betas = kreg.fit([(X1c,y1),(X2c1,y21),(X2c2,y22)], jl, loss='quantile', tau=0.5, l1=0.9, l2=0.37)
print(JointKRegressor.display(kreg,varnames,"beta"))