# Reecriture

In [1]:
from dataclasses import dataclass
import numpy as np
import pandas as pd
from sklearn import datasets

# Partie 1
@dataclass
class Dataset:
    X: np.ndarray
    Y: np.ndarray
    features_names: list
    n: int
    p: int


def add_intercept(vecteur_1: np.ndarray, K: np.ndarray):
    M = np.column_stack([vecteur_1, K])
    return M


def to_dataframe(A: np.ndarray, B: np.ndarray, noms: list):
    df = pd.DataFrame(A, columns=noms)
    df["variable_expliquée"] = B
    return df



## Regression

In [2]:
@dataclass
class regression_lineaire:
    coefficients: np.ndarray
    features_names: list


def regression(X, Y, features_names):
    Xt_X = np.dot(X.T, X)
    Xt_Y = np.dot(X.T, Y)
    Beta = np.dot(np.linalg.inv(Xt_X), Xt_Y)
    intercept = Beta[0]
    betas = Beta[1:]
    return regression_lineaire(coefficients=Beta, features_names=features_names), intercept, betas


# 3eme partie : résultats
class results:
    def __init__(self, X, Y, df, Beta):
        self.X = X
        self.Y = Y
        self.df = df
        self.Beta = Beta
        self.valeurs_prédites = None
        self.erreurs = None
        self.R2 = None
        self.RMSE = None

    def predict(self):
        self.valeurs_prédites = np.dot(self.X, self.Beta)
        self.erreurs = self.Y - self.valeurs_prédites

    def metriques(self):
        ybar = np.mean(self.Y)
        SCT = np.sum((self.Y - ybar)**2) / len(self.Y)
        SCR = np.sum(self.erreurs**2) / len(self.Y)
        self.R2 = (SCT - SCR) / SCT
        self.RMSE = np.sqrt(SCR)

    def extend_df(self):
        self.df["valeurs_prédites"] = self.valeurs_prédites
        self.df["erreurs"] = self.erreurs
        return self.df




# Fonction principale

In [3]:
def Fonction_regression():

    # Génération des données
    T, V = datasets.make_regression(n_samples=1000, n_features=6, noise=10)

    p = 6
    feature_list = [f"X{i}" for i in range(p)]
    feature_list.insert(0, "constante")

    data_1 = Dataset(X=T, Y=V, features_names=feature_list, n=1000, p=6)

    vecteur = np.ones((data_1.n, 1))
    X_avec_constante = add_intercept(vecteur_1=vecteur, K=data_1.X)

    df = to_dataframe(X_avec_constante, data_1.Y, feature_list)
    print(df)

    # Régression
    res_regression, intercept, betas = regression(X_avec_constante, data_1.Y, feature_list)
    Beta = res_regression.coefficients

    print("dataclass contenant les coefficients :", res_regression)

    # Dictionnaire
    dict_coeff = {feature_list[i]: Beta[i] for i in range(len(feature_list))}
    print("Dictionnaire des coefficients : ", dict_coeff)

    # Résultats
    res = results(X_avec_constante, data_1.Y, df, Beta)
    res.predict()
    res.metriques()

    df_extended = res.extend_df()

    print("Extended DataFrame :\n", df_extended.head())
    print("R² :", res.R2)
    print("RMSE :", res.RMSE)





### Lancer tout

In [4]:
Fonction_regression()

     constante        X0        X1        X2        X3        X4        X5  \
0          1.0 -0.160886  0.524608 -1.472011 -1.547592 -0.220107 -0.763694   
1          1.0 -0.754835 -0.916806 -0.568178 -1.724806  0.295239  0.893025   
2          1.0  0.163280 -0.844185 -0.536827  1.541908  0.715508 -0.737996   
3          1.0  0.614690 -1.352792 -0.042602 -0.141465  0.295241 -1.163164   
4          1.0 -0.126317 -0.785265  0.232629 -0.524015  0.841517 -0.156809   
..         ...       ...       ...       ...       ...       ...       ...   
995        1.0  0.955178 -0.386094 -0.302928  0.026328  1.871968 -0.628266   
996        1.0 -1.413922 -0.157979 -0.590266  0.031781 -0.346818  1.005278   
997        1.0  0.135125  0.078470  0.009918 -0.259211  1.475170  1.039413   
998        1.0 -0.074369 -0.189776 -0.811404 -3.123758  0.268456 -0.628920   
999        1.0  0.013894  2.427328  0.361955  1.618220 -1.047386  0.726241   

     variable_expliquée  
0            -99.318623  
1          

#  Tests

In [5]:
def test_données_et_transformation_en_df():
    T, V = datasets.make_regression(n_samples=100, n_features=3, noise=5)
    p = 3
    feature_list = [f"X{i}" for i in range(p)]
    feature_list.insert(0, "constante")

    data_test = Dataset(X=T, Y=V, features_names=feature_list, n=100, p=3)

    vecteur = np.ones((data_test.n, 1))
    X_avec_constante = add_intercept(vecteur_1=vecteur, K=data_test.X)

    df_test = to_dataframe(X_avec_constante, data_test.Y, feature_list)

    assert df_test.shape == (100, 5), "La transformation en DataFrame a échoué"
    assert list(df_test.columns) == feature_list + ["variable_expliquée"], "Les noms des colonnes sont incorrects"

In [6]:
test_données_et_transformation_en_df()