### Load packages

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn import set_config; set_config(display="diagram")
from sklearn.datasets import load_boston, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline

import warnings
warnings.filterwarnings("ignore")

### Load data

In [None]:
print(load_boston()["DESCR"])

In [None]:
df_boston_price = pd.concat(
    [
        pd.DataFrame(
            load_boston()["data"],
            columns=load_boston()["feature_names"]
        ),
        pd.DataFrame(
            load_boston()["target"],
            columns=["Price"]
        )
    ], 
    axis=1
)

df_boston_price.head()

### Check dataset quality

In [None]:
df_boston_price.info()

In [None]:
# Summarizing the stats of data
df_boston_price.describe()

In [None]:
# Check for missing values
df_boston_price.isnull().sum()

### EDA

In [None]:
# Correlation analysis: How independent and target features correlate?
sns.heatmap(
    df_boston_price.corr()
)

plt.plot()

### Prepare modelling dataset

In [None]:
## Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    df_boston_price.drop(columns=["Price"]),
    df_boston_price["Price"],
    test_size=0.2
)

### Create pipeline for data transformation and fit model

In [None]:
## Pipeline creation
pipeline = Pipeline(
    steps=[
        ("Standard Scaler", StandardScaler()),
        ("Regressor", LinearRegression())
    ]
)

pipeline.fit(X_train, y_train)

### Check model performance

In [None]:
model_predictions = pd.concat(
    [
        pd.DataFrame(
            pipeline.predict(X_test), 
            columns=["PredictedPrice"]
        ), 
        pd.DataFrame(
            y_test.reset_index(drop=True)
        )
    ], 
    axis=1
)

In [None]:
mean_absolute_error(
    y_true=model_predictions["Price"], 
    y_pred=model_predictions["PredictedPrice"]
)

In [None]:
np.sqrt(
    mean_squared_error(
        y_true=model_predictions["Price"], 
        y_pred=model_predictions["PredictedPrice"]
    )
)

### Pickling the model file for deployment

In [None]:
pickle.dump(
    pipeline, 
    open(
        "regression_pipeline.pkl", 
        "wb" #To be opened in the write-byte mode!
    )
)

# pickled_model = pickle.load(
#     open("regression_pipeline.pkl", "rb")
# )

# pickled_model.predict(X_test)