In [3]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [4]:
## cours JEDHA
# SST, SSE and SSR have to be calculated manually
# generate some example data
X = np.array([
    [1,3,5,6,7],
    [4.6, 3.7, 3.4, 3.0, 3.1]
]).transpose()
Y = np.array([2.1, 3.5, 4.4, 5.6, 5.9])

from sklearn.linear_model import LinearRegression
model = LinearRegression() # create and instanceof the model

model.fit(X,Y) # fit the model

# calculate evaluation metrics
SST = np.sum(np.square(Y - np.mean(Y)))
print("Sum of Square Total {}".format(SST))

SSE = np.sum(np.square(model.predict(X) - np.mean(Y)))
print("Sum of Square Explained {}".format(SSE))

SSR = np.sum(np.square(Y - model.predict(X)))
print("Sum of Square Residual {}".format(SSR))
print("\n")

# calculate R square and adjusted R-square
R_2 = 1 - SSR/SST
print("R square {}".format(R_2))
R_2_alt = model.score(X,Y) # alternative method to calculate R square
print("R square {}".format(R_2_alt))
n = X.shape[0]
p = X.shape[1]
R_2_adj = 1 - (n-1)/(n-p-1)*(1-R_2)
print("R square adjusted {}".format(R_2_adj))

Sum of Square Total 9.74
Sum of Square Explained 9.612353658536586
Sum of Square Residual 0.12764634146341436


R square 0.9868946261331196
R square 0.9868946261331196
R square adjusted 0.9737892522662392


In [5]:
# code chatgpt
import numpy as np
from sklearn.linear_model import LinearRegression

# Génération des données exemple
X = np.array([
    [1, 3, 5, 6, 7],
    [4.6, 3.7, 3.4, 3.0, 3.1]
]).transpose()
Y = np.array([2.1, 3.5, 4.4, 5.6, 5.9])

# Création et ajustement du modèle
model = LinearRegression()
model.fit(X, Y)

# Calcul des SST, SSE, SSR manuellement
SST = np.sum(np.square(Y - np.mean(Y)))
SSE = np.sum(np.square(model.predict(X) - np.mean(Y)))
SSR = np.sum(np.square(Y - model.predict(X)))

# Affichage des résultats avec f-strings
print(f"Sum of Squares Total (SST): {SST}")
print(f"Sum of Squares Explained (SSE): {SSE}")
print(f"Sum of Squares Residual (SSR): {SSR}\n")

# Calcul de R carré et de R carré ajusté
R_2 = 1 - SSR / SST
R_2_alt = model.score(X, Y)  # Méthode alternative pour R^2 avec scikit-learn
n = X.shape[0]  # Nombre d'observations
p = X.shape[1]  # Nombre de prédicteurs

R_2_adj = 1 - (n - 1) / (n - p - 1) * (1 - R_2)

print(f"R square (manually calculated): {R_2}")
print(f"R square (from sklearn): {R_2_alt}")
print(f"Adjusted R square: {R_2_adj}")

Sum of Squares Total (SST): 9.74
Sum of Squares Explained (SSE): 9.612353658536586
Sum of Squares Residual (SSR): 0.12764634146341436

R square (manually calculated): 0.9868946261331196
R square (from sklearn): 0.9868946261331196
Adjusted R square: 0.9737892522662392


In [6]:
## cours JEDHA
# # alternative solution with library statsmodels (useful mainly for linear models)
import statsmodels.api as sm

X2 = sm.add_constant(X) # the coefficient beta_0 also called intercept is not automatically included, so we need to manually add a constant variable equal to one.
est = sm.OLS(Y, X2)
est2 = est.fit()
print("\n")
print("-----------------------------------------------------------------------------------------")
print("------------------------Results from statsmodels-----------------------------------------")
print("-----------------------------------------------------------------------------------------")
print("\n")
print(est2.summary())



-----------------------------------------------------------------------------------------
------------------------Results from statsmodels-----------------------------------------
-----------------------------------------------------------------------------------------


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.987
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     75.30
Date:                Mon, 21 Oct 2024   Prob (F-statistic):             0.0131
Time:                        10:34:55   Log-Likelihood:                 2.0751
No. Observations:                   5   AIC:                             1.850
Df Residuals:                       2   BIC:                            0.6781
Df Model:                           2                                         
Covariance Type

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [7]:
import statsmodels.api as sm

def fit_and_summarize_ols(X, Y):
    """Ajoute une constante, ajuste un modèle OLS, et affiche un résumé."""
    X_with_intercept = sm.add_constant(X)  # Ajout de l'intercept (constante)
    
    model = sm.OLS(Y, X_with_intercept)    # Création du modèle OLS
    results = model.fit()                  # Ajustement du modèle
    
    # Affichage structuré des résultats
    print("\n" + "-"*90)
    print("Résultats du modèle de régression linéaire (statsmodels)\n")
    print(results.summary())
    print("-"*90 + "\n")

# Exemple d'utilisation avec tes données
X = np.array([
    [1, 3, 5, 6, 7],
    [4.6, 3.7, 3.4, 3.0, 3.1]
]).transpose()
Y = np.array([2.1, 3.5, 4.4, 5.6, 5.9])

fit_and_summarize_ols(X, Y)


------------------------------------------------------------------------------------------
Résultats du modèle de régression linéaire (statsmodels)

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.987
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     75.30
Date:                Mon, 21 Oct 2024   Prob (F-statistic):             0.0131
Time:                        10:41:33   Log-Likelihood:                 2.0751
No. Observations:                   5   AIC:                             1.850
Df Residuals:                       2   BIC:                            0.6781
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      

  warn("omni_normtest is not valid with less than 8 observations; %i "
