<a href="https://colab.research.google.com/github/mace1341/studious-invention/blob/main/Deep_Learning_Boston_House_Prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from keras.datasets import boston_housing

# Can we use the 13 features to model and predict the media value
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz
[1m57026/57026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Preparing the data
# Feature-wise normalization
# In other words, normalize each feature's scale such that:
# feature center around 0 and has a unit SD

mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std


In [None]:
# Building the Network

from keras import models
from keras import layers

# helper function
def build_model():
  model = models.Sequential()
  model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dense(1))
  model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
  return model

# Small Network with 2 hidden layers, each with 64 units
# in general the less training data, the worse overfitting will be,
#   and using a small network is one way to mitigate overfitting

# Network ends with single unit and no activation layer (it will be a linear layer)
# This is typical setup for scalar regression (where your tring to predice a single continuous value)
# Applying an activation function would constrain the range the output can take;
#   i.e., if you applied a sigmoid activation function to the last layer the network
#   could only lear to predict values between 0 and 1

# mse loss function - square of the difference between predictions and target
#   widely used for regressions

# mae helpful - measures the absolute difference between predictions and target


In [None]:
# K-fold validation
# Given small data set, reliabe way of evaluating model
#  validation on k partitions of data set

k = 4
num_val_samples = len(train_data) // k
num_epochs = 500
all_mae_histories = []

for i in range(k):
  print('processing fold #', i)

  # create partitioned (slices) data samples and targets
  val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
  val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

  partial_train_data = np.concatenate(
      [train_data[:i * num_val_samples],
       train_data[(i + 1) * num_val_samples:]],
      axis = 0)

  partial_train_targets = np.concatenate(
      [train_targets[:i * num_val_samples],
       train_targets[(i + 1) * num_val_samples:]],
      axis = 0)

  # call model
  model = build_model()

  # train model for each partition
  history = model.fit(partial_train_data,
                      partial_train_targets,
                      epochs=num_epochs,
                      batch_size=1,
                      verbose=0)

  # capture model performance at each epoch
  mae_history = history.history['mae']

  # capture in history list
  all_mae_histories.append(mae_history)

  # compute mean of the per-epoch MAE score for all folds
average_mae_history = [
    np.mean([x[i] for x in all_mae_histories])
    for i in range(num_epochs)
    ]

processing fold # 0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


processing fold # 1
processing fold # 2
processing fold # 3
