<a href="https://colab.research.google.com/github/mrtzcardo/Predicting_Housing_Prices/blob/main/Predicting_Housing_Prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Attempt to predict the median price of homes in a given Boston suburb in the
mid-1970s, given data points about the suburb at the time, such as the crime rate, the local property tax rate, and so on.
Only 506 data points, split
between 404 training samples and 102 test samples.
Each feature in the input data
(for example, the crime rate) has a different scale.



In [None]:
from keras.datasets import boston_housing
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

print(train_data.shape) # 404 training samples, 13 numerical features
print(test_data.shape)  # 102 training samples, 13 numerical features
#print(train_targets)   #  The targets are median values in thousands of dollars

In [None]:
'''Normalizing the data'''
'''Subtract the mean of the feature and divide by the standard deviation, 
so that the feature is centered around 0 and has a unit standard deviation'''

mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [None]:
from keras import models
from keras import layers

def build_model():
  model = models.Sequential()
  model.add(layers.Dense(64, activation='relu', 
                         input_shape=(train_data.shape[1],)))
  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dense(1))
  model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) #mse is common for regression probs
  return model

In [None]:
'''K-fold validation since little data'''

import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []

for i in range(k):
  print('processing fold #', i)
  val_data = train_data[i*num_val_samples: (i+1) * num_val_samples] #Prepares the validation data: data from partition #k
  val_targets = train_targets[i*num_val_samples: (i+1) * num_val_samples]

  partial_train_data = np.concatenate(          #Prepares the training data: data from all other partitions
    [train_data[:i * num_val_samples],
    train_data[(i + 1) * num_val_samples:]],
    axis=0)
  partial_train_targets = np.concatenate(
    [train_targets[:i * num_val_samples],
    train_targets[(i + 1) * num_val_samples:]],
    axis=0)
  
  model = build_model()
  model.fit(partial_train_data, partial_train_targets,
            epochs = num_epochs, batch_size=1, verbose=0)
  val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
  all_scores.append(val_mae)

print(all_scores)
print(np.mean(all_scores))