In [44]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [45]:

def make_dataset(n=1000, mu=1.0, sigma2=0.2):
    x = np.linspace(0, 2*np.pi, n)
    x1 = x**3
    x2 = np.sin(x)
    noise = np.random.normal(mu, np.sqrt(sigma2), n)
    y = 2 - x1 + 3*x2 + noise
    return x ,x1, x2, y


In [46]:

def compute_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    ndei = rmse / np.std(y_true)
    return mse, rmse, ndei

In [47]:
def train_model(X, y, fit_bias=True, fixed_bias=None, scale=False):
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )

    scaler = None
    if scale:
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)


    if fixed_bias is not None:
            model = LinearRegression(fit_intercept=False)
            model.fit(X_train, y_train - fixed_bias)
            y_pred_train = model.predict(X_train) + fixed_bias
            y_pred_test = model.predict(X_test) + fixed_bias

    else:
            model = LinearRegression(fit_intercept=fit_bias)
            model.fit(X_train, y_train)
            y_pred_train = model.predict(X_train)
            y_pred_test = model.predict(X_test)



In [48]:
def train_model(X, y, fit_bias=True, fixed_bias=None, scale=False):
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )

    scaler = None
    if scale:
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

    if fixed_bias is not None:
        model = LinearRegression(fit_intercept=False)
        model.fit(X_train, y_train - fixed_bias)
        y_pred_train = model.predict(X_train) + fixed_bias
        y_pred_test = model.predict(X_test) + fixed_bias
        bias = fixed_bias
    else:
        model = LinearRegression(fit_intercept=fit_bias)
        model.fit(X_train, y_train)
        y_pred_train = model.predict(X_train)
        y_pred_test = model.predict(X_test)
        bias = model.intercept_

    mse_tr = mean_squared_error(y_train, y_pred_train)
    rmse_tr = np.sqrt(mse_tr)
    ndei_tr = rmse_tr / np.std(y_train)

    mse_te = mean_squared_error(y_test, y_pred_test)
    rmse_te = np.sqrt(mse_te)
    ndei_te = rmse_te / np.std(y_test)

    return {
        "coef": model.coef_,
        "bias": bias,
        "mse_train": mse_tr,
        "rmse_train": rmse_tr,
        "ndei_train": ndei_tr,
        "mse_test": mse_te,
        "rmse_test": rmse_te,
        "ndei_test": ndei_te
    }


In [49]:
x ,x1, x2, y = make_dataset()

res_a_raw = train_model(
    X = x.reshape(-1, 1)**2,
    y = y,
    fit_bias=False,
    fixed_bias=2,
    scale=False
)

res_a_scaled = train_model(
    X = x.reshape(-1, 1)**2,
    y = y,
    fit_bias=False,
    fixed_bias=2,
    scale=True
)


In [50]:
X = np.column_stack((x1, x2))

res_b = train_model(
    X = X,
    y = y,
    fit_bias=False
)


In [51]:
res_c = train_model(
    X = X,
    y = y,
    fit_bias=True
)


In [None]:
rows = []

for mu in [0,1,2]:
    for sigma2 in [0.05,0.2,1]:

        x, x1, x2, y = make_dataset(mu=mu, sigma2=sigma2)
        X = np.column_stack((x1, x2))

        res = train_model(
            X = X,
            y = y,
            fit_bias=True
        )

        res.update({"mu":mu, "sigma2":sigma2, "Model":"(d)"})
        rows.append(res)
        
df_noise = pd.DataFrame(rows)
df_noise


Unnamed: 0,coef,bias,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test,mu,sigma2,Model
0,"[-0.9998321376658564, 2.9889470203734043]",1.989238,0.05034,0.224367,0.003165,0.05341,0.231106,0.003122,0,0.05,(d)
1,"[-1.000338892155191, 2.9546635410792046]",2.005498,0.199774,0.446961,0.006304,0.189183,0.434952,0.005877,0,0.2,(d)
2,"[-0.9981743972057278, 3.149701405244439]",1.862962,1.045782,1.022635,0.014433,1.083039,1.040692,0.014033,0,1.0,(d)
3,"[-1.000014503446748, 2.9947150891029337]",3.00177,0.054506,0.233464,0.003293,0.055626,0.235851,0.003185,1,0.05,(d)
4,"[-1.000685268745662, 2.9421128457640067]",3.051481,0.179927,0.424178,0.005981,0.189814,0.435676,0.005884,1,0.2,(d)
5,"[-1.0011995519131092, 2.9938175960838733]",3.136388,1.087323,1.042748,0.014689,1.232813,1.110321,0.015004,1,1.0,(d)
6,"[-1.0002812959067036, 2.996511536250894]",4.010387,0.052415,0.228944,0.003228,0.047365,0.217634,0.00294,2,0.05,(d)
7,"[-0.9997647022085405, 3.0337334363265245]",3.986754,0.197816,0.444766,0.006273,0.21512,0.46381,0.006266,2,0.2,(d)
8,"[-1.0003197122946537, 2.945891549189883]",3.983872,0.924874,0.961704,0.013564,1.069162,1.034003,0.01397,2,1.0,(d)
