In [None]:
import warnings
import os
os.environ["OMP_NUM_THREADS"] = "6"
# warnings.filterwarnings("ignore", message="KMeans is known to have a memory leak*")
# warnings.filterwarnings("ignore", message="Solution may be inaccurate*")

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

import stratifreg
from stratifreg.two_groups import Joint2Regressor
from stratifreg.k_groups import JointKRegressor
from stratifreg.gmm_groups import Joint2GMMRegressor
from stratifreg.utils import JointUtils

In [None]:
def polynomial_tramsform(X, degree=2, include_bias=False):
    """
    Transform a DataFrame Pandas/Array Numpy [ X (n, p) ] with polynoms 
    Example : degree = 2 -> 1, x1, x2, ..., x1^2, x2^2, x1*x2, etc.
    """
    poly = PolynomialFeatures(degree=degree, include_bias=include_bias)
    X_poly       = poly.fit_transform(X)
    colname_poly = poly.get_feature_names_out(X.columns)
    X_poly_df = pd.DataFrame(X_poly, columns=colname_poly, index=X.index)
    return X_poly_df, colname_poly

def get_data(path_X,path_y,scale=False,degree=0):
    scaler = StandardScaler()
    X = pd.read_csv(path_X)
    y = pd.read_csv(path_y)
    columnsXpoly = X.columns
    if degree>1: 
        X,columnsXpoly = polynomial_tramsform(X,degree,False)
    X  = pd.DataFrame(scaler.fit_transform(X), columns=columnsXpoly)
    X1,X2,y1,y2  = JointUtils.split_at_y0(X,y)
    if scale: X = scaler.fit_transform(X)
    if scale: X1 = scaler.fit_transform(X1)
    if scale: X2 = scaler.fit_transform(X2)
    Xc          = JointUtils.add_intercept(X)
    X1c         = JointUtils.add_intercept(X1)
    X2c         = JointUtils.add_intercept(X2)
    return X,Xc,y,X1,X1c,y1,X2,X2c,y2,columnsXpoly

X,Xc,y,X1,X1c,y1,X2,X2c,y2,varnames = get_data('./datasets/Xf_all_datasurvey.csv',
                                               './datasets/yf_all_datasurvey.csv')
print(Xc.shape,X1c.shape,X2c.shape,y.shape,y1.shape,y2.shape)

In [None]:
print(X.shape, X1c.shape,X2c.shape)
print(X.head())

In [None]:
reg1 = Joint2Regressor()
[beta], var_beta, sigma2s = reg1.fit_ols_single(Xc, y)
print(JointKRegressor.display(reg1, "beta"))
print(np.round([reg1.variables_["lgk"],reg1.variables_["bic"],reg1.variables_["aic"]],2))

In [None]:
reg = Joint2Regressor()
[beta1, beta2], sigma2s = reg.fit_ols_groups(X1c, X2c, y1, y2, sigma_mode='two')
print(JointKRegressor.display(reg, "beta"))
print(np.round([reg.lgk1_,reg.bic1_,reg.aic1_],2))
print(np.round([reg.lgk2_,reg.bic2_,reg.aic2_],2))

In [None]:
kreg = JointKRegressor()
rr = kreg.fit([(X1c,y1),(X2c,y2)], joint_X_list=None, loss='quadratic', 
         tau=0.0, l1=0., l2=0., weights_list=None)
print(kreg.X_columns_,len(kreg.X_columns_),len(rr[0]))
print(JointKRegressor.display(kreg, "beta"))

In [None]:
gmmreg = Joint2GMMRegressor()
gmmreg.fit(X1c, X2c, y1, y2, x0=None, m1=1, m2=1, max_iter=10)
print(Joint2GMMRegressor.display(gmmreg, "beta"))
# print(Joint2GMMRegressor.predict(gmmreg, Xc,"beta")[:5])

In [None]:
kreg = JointKRegressor()
kreg.fit([(X1c,y1),(X2c,y2)], joint_X_list=None, loss='quantile', 
                   tau=0.5, l1=0., l2=0., weights_list=None)
print(JointKRegressor.display(kreg, "beta"))

In [None]:
x0_c  = JointUtils.find_x0(Xc, y)
x0_LL = JointUtils.find_x0_LL(Xc, y, L=1)
regctr_x0c  = Joint2Regressor()
resu_x0c    = regctr_x0c.fit_ols_jointure_a_b(X1c, X2c, y1, y2, x0_c, 
                                  y0=None, sigma_mode='one', cas='a')
regctr_x0LL = Joint2Regressor()
resu_x0LL   = regctr_x0LL.fit_ols_jointure_a_b(X1c, X2c, y1, y2, x0_LL, 
                                  y0=None, sigma_mode='one', cas='a')

print(regctr_x0c.X_columns_, regctr_x0LL.X_columns_)
print(JointKRegressor.display(regctr_x0c, "beta_x0_c"))
JointUtils.check_jointure_constraint(resu_x0c[0],[x0_c])
print(JointKRegressor.display(regctr_x0LL, "beta_x0_LL"))
JointUtils.check_jointure_constraint(resu_x0LL[0],[x0_LL])
# Joint2Regressor.predict(regctr_x0c,Xc,1) [:5]

In [None]:
gmmreg = Joint2GMMRegressor()
beta_mat, sigma2_1, sigma2_2 = gmmreg.fit(X1c, X2c, y1, y2, x0=x0_c, m1=1, m2=1,max_iter=10)
print(Joint2GMMRegressor.display(gmmreg,"beta"))

In [None]:
gmmreg = Joint2GMMRegressor()
gmmreg.fit(X1c, X2c, y1, y2, x0=x0_c, m1=2, m2=2,max_iter=10)
print(Joint2GMMRegressor.display(gmmreg,"beta"))

In [None]:
kreg = JointKRegressor()
group_list = [(X1c,y1),(X2c,y2)]
kreg.fit(group_list, joint_X_list=None, loss='quadratic', 
         tau=0.5, l1=0.0, l2=0.0, weights_list=None)
print(JointKRegressor.display(kreg,"beta"))

In [None]:
kreg = JointKRegressor()
betas = kreg.fit([(X1c,y1),(X2c,y2)], joint_X_list=[x0_c], loss='quadratic', 
         tau=0.5, l1=0.0, l2=0.0, weights_list=None) 
print(JointKRegressor.display(kreg,"beta"))
JointUtils.check_jointure_constraint(betas,[x0_c],tol=1e-3)

In [None]:
reg = Joint2Regressor()
betas, sigma2s = reg.fit_ols_jointure_soft(X1c, X2c, y1, y2, x0_c, lc=15000000.0)
print(JointKRegressor.display(reg, "beta_S_x0_"))
JointUtils.check_jointure_constraint(betas,[x0_c],tol=1e-3)

In [None]:
kreg = JointKRegressor()
X2c1,X2c2,y21,y22  = JointUtils.split_at_y0(X2c,y2)
jl = [x0_c,JointUtils.find_x0(X2c, y2)]

betas = kreg.fit([(X1c,y1),(X2c1,y21),(X2c2,y22)], jl, loss='quantile', 
                 tau=0.5, l1=0.9, l2=0.3)
print(JointKRegressor.display(kreg,"beta"))
# JointKRegressor.predict(kreg,Xc,1) [:5]

In [None]:
import statsmodels.api as sm
model  = sm.OLS(y, Xc.values).fit()
model1 = sm.OLS(y1, X1c.values).fit()
model2 = sm.OLS(y2, X2c.values).fit()
print(np.round([model.llf, model.bic, model.aic],2))
print(np.round([model1.llf, model1.bic, model1.aic],2))
print(np.round([model2.llf, model2.bic, model2.aic],2))