### Import the necessary Libraries

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

### Downloading the dataset

- California Housing Dataset

In [3]:
housing = fetch_california_housing()

### Inspecting the data and it's shape

In [5]:
housing.data, housing.data.shape

(array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 (20640, 8))

### Inspecting the data target and it's shape

In [6]:
housing.target, housing.target.shape

(array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]), (20640,))

### Splitting the dataset

- Splitting the dataset into training and testing dataset (75:25 percentage split)

In [11]:
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)

In [13]:
print(X_train_full.shape, y_train_full.shape)
print(X_test.shape, y_test.shape)

(15480, 8) (15480,)
(5160, 8) (5160,)


- Splitting the training dataset into training and validation dataset (75:25 percentage split)

In [14]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

In [15]:
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

(11610, 8) (11610,)
(3870, 8) (3870,)


### Initializing the Machine Learning Regressor

- The model uses the ReLU activation function in the hidden layers and it uses a variant of gradient descent called Adam to minimize the mean squared error with a little bit of l2 regularization which can be controlled using alpha parameter

In [16]:
mlp_reg = MLPRegressor(hidden_layer_sizes=[50, 50, 50], random_state=42)
pipeline = make_pipeline(StandardScaler(), mlp_reg)

In [17]:
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_valid)

### Calculating the root mean squared error

In [18]:
rmse = mean_squared_error(y_valid, y_pred, squared=False)



In [19]:
print(rmse)

0.5053326657968684
