<div>
    <img src="https://storage.googleapis.com/kaggle-datasets-images/5227/7876/3d18388d350d2791f4121a232acce097/dataset-cover.jpg" />
</div>

In [None]:
import numpy as np
from numpy import asarray
import pandas as pd

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

<h1 id="dataset" style="color:#2a200b; background:#fcc688;"> 
    <center>Dataset
        <a class="anchor-link" href="#dataset" target="_self">¶</a>
    </center>
</h1>

In [None]:
path = '../input/california-housing-prices/housing.csv'
df = pd.read_csv(path)
df = shuffle(df)
values = df.values
df.head()

In [None]:
# replace ocean proximity with values
ocean_proximity = {v:k for k,v in enumerate(df['ocean_proximity'].unique())}
df.replace(ocean_proximity, inplace=True)

# replace NaN with mean
df = df.apply(lambda x: x.fillna(x.mean()))

features = df.drop(columns=['median_house_value'])
labels = df['median_house_value']

In [None]:
houses_mean = labels.mean()
houses_std = labels.std()

# normalize prices
labels = (labels - houses_mean) / houses_std

In [None]:
# normalize the features
scaler = MinMaxScaler()
scaler.fit(features)
features = scaler.transform(features)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, train_size=0.67)

<h1 id="model" style="color:#2a200b; background:#fcc688;"> 
    <center>Model
        <a class="anchor-link" href="#model" target="_self">¶</a>
    </center>
</h1>

In [None]:
def fit_model(X_train, y_train):
    # define neural network model
    features = X_train.shape[1]
    model = Sequential()
    model.add(Dense(20, kernel_initializer='he_normal', activation='relu', input_dim=features))
    model.add(Dense(5, kernel_initializer='he_normal', activation='relu'))
    model.add(Dense(1))
    # compile the model and specify loss and optimizer
    opt = Adam(learning_rate=0.01, beta_1=0.85, beta_2=0.999)
    model.compile(optimizer=opt, loss='mse')
    # fit the model on the training dataset
    model.fit(X_train, y_train, verbose=0, epochs=300, batch_size=16)
    return model

<h1 id="ensemble" style="color:#2a200b; background:#fcc688;"> 
    <center>Ensemble
        <a class="anchor-link" href="#ensemble" target="_self">¶</a>
    </center>
</h1>

In [None]:
def fit_ensemble(n_members, X_train, X_test, y_train, y_test):
    ensemble = list()
    for i in range(n_members):
        # define and fit the model on the training set
        model = fit_model(X_train, y_train)
        # evaluate model on the test set
        yhat = model.predict(X_test, verbose=0)
        mae = mean_absolute_error(y_test, yhat)
        print('>%d, MAE: %.3f' % (i+1, mae))
        # store the model
        ensemble.append(model)
    return ensemble

<h1 id="predict" style="color:#2a200b; background:#fcc688;"> 
    <center>Interval Predictions
        <a class="anchor-link" href="#predict" target="_self">¶</a>
    </center>
</h1>

In [None]:
def predict_with_pi(ensemble, X):
    # make predictions
    yhat = [model.predict(X, verbose=0) for model in ensemble]
    yhat = asarray(yhat)
    # calculate 95% gaussian prediction interval
    interval = 1.96 * yhat.std()
    lower, upper = yhat.mean() - interval, yhat.mean() + interval
    return lower, yhat.mean(), upper

<h1 id="training" style="color:#2a200b; background:#fcc688;"> 
    <center>Training
        <a class="anchor-link" href="#training" target="_self">¶</a>
    </center>
</h1>

In [None]:
# fit ensemble
n_members = 10
ensemble = fit_ensemble(n_members, X_train, X_test, y_train, y_test)
# make predictions with prediction interval
newX = asarray([X_test[0, :]])
lower, mean, upper = predict_with_pi(ensemble, newX)
print('Point prediction: %.3f' % mean)
print('95%% prediction interval: [%.3f, %.3f]' % (lower, upper))
print('Actual result:', y_test.iloc[0])