# Goal

The goal is to create a model to predict median house prices in a suburb of Boston in the 1970s. The dataset has 506 entries, split between 404 training samples and 102 test samples. 

In [1]:
import numpy as np
import keras
import matplotlib.pyplot as plt

from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/keras-datasets/boston_housing.npz


In [2]:
train_data.shape, test_data.shape

((404, 13), (102, 13))

In [3]:
train_data[0]

array([  1.23247,   0.     ,   8.14   ,   0.     ,   0.538  ,   6.142  ,
        91.7    ,   3.9769 ,   4.     , 307.     ,  21.     , 396.9    ,
        18.72   ])

In [4]:
# median house price in 1000 dollars
train_targets[0]

15.2

## Featurewise Normalization

In [5]:
from typing import Tuple


def normalize(training_data: np.array, test_data: np.array) -> Tuple[np.array, np.array]:
    x_train = np.copy(training_data)
    x_test = np.copy(test_data)
    
    colwise_mean = x_train.mean(axis=0)
    colwise_sd = x_train.std(axis=0)
    
    x_train = (x_train - colwise_mean) / colwise_sd
    x_test = (x_test - colwise_mean) / colwise_sd
    
    return x_train, x_test

## Model

In [6]:
from keras import models
from keras import layers

def build_model(num_features):
    model = models.Sequential()
    
    model.add(layers.Dense(64, activation='relu', input_shape=(num_features,)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    
    return model