In [1]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
import numpy as np
import pandas as pd


# creaete data
np.random.seed(0)
num_samples = 10000
X = np.random.rand(num_samples, 2) * 10
y = np.random.rand(num_samples) * 20 - 10

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=0)

scaler = StandardScaler()
regression = LinearRegression() 

steps = [("scaler", scaler), ("regression", regression)]

pipeline = Pipeline(steps=steps)

pipeline.fit(X_train, y_train)

coefficients = pipeline.named_steps["regression"].coef_
intercept = pipeline.named_steps["regression"].intercept_

print(f"Coefficients: {coefficients}")
print(f"Intercept: {intercept}")

y_pred = pipeline.predict(X_test)

data = {"y_true": y_test, "y_pred": y_pred}
df = pd.DataFrame(data)
print(df.head())

rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE: {rmse}")

Coefficients: [ 0.00099503 -0.01761937]
Intercept: 0.03410986471560885
     y_true    y_pred
0 -2.719998  0.054270
1 -7.378500  0.031076
2 -4.916802  0.053589
3 -6.676572  0.053467
4 -7.671276  0.015000
RMSE: 5.855947236028672
