<a href="https://colab.research.google.com/github/kjcoursera/TensorFlow/blob/main/Intro_neural_nets_CA_housing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# TensorFlow and tf.keras
import tensorflow as tf

# Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

print(tf.__version__)

2.3.0


In [2]:
train_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv")
#shuffle the training dataset
train_df = train_df.reindex(np.random.permutation(train_df.index))

test_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_test.csv")


In [3]:
#Normalize the train_df
train_df_mean = train_df.mean()
train_df_std = train_df.std()

train_df_norm = (train_df-train_df_mean)/train_df_std

train_df_norm.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
12943,-1.136011,0.825688,0.19152,1.321287,0.888706,1.235721,0.982464,0.878137,0.194847
3677,0.818939,-0.8914,-0.920744,0.309336,-0.088757,0.078778,-0.008379,0.873368,0.576797
5556,0.694261,-0.779111,-0.285165,-0.181043,0.335918,1.037961,0.329704,-0.829899,-0.371612
3963,0.79899,-0.849292,0.50931,-0.683808,-0.824226,-0.4753,-0.762564,0.710016,-0.257803
1620,1.16305,-1.134693,0.032625,0.334107,0.428445,0.296576,0.498746,-0.195308,-0.356092


In [4]:
#normalize test_df
test_df_mean = test_df.mean()
test_df_std = test_df.std()

test_df_norm = (test_df-test_df_mean)/test_df_std

In [5]:
# Create an empty list that will hold all feature columns
feature_columns = []
resolution_in_Zs = 0.3 # 3/10
# Create a bucket feature column for latitude.
latitude_as_a_numeric_column = tf.feature_column.numeric_column("latitude")
latitude_boundaries = list(np.arange(int(min(train_df_norm['latitude'])), 
                                     int(max(train_df_norm['latitude'])), 
                                     resolution_in_Zs))
latitude = tf.feature_column.bucketized_column(latitude_as_a_numeric_column, latitude_boundaries)


# Create a bucket feature column for longitude
longitude_as_a_numeric_column = tf.feature_column.numeric_column("longitude")
longitude_boundaries = list(np.arange(int(min(train_df_norm["longitude"])),
                                     int(max(train_df_norm["longitude"])),
                                     resolution_in_Zs))
longitude = tf.feature_column.bucketized_column(longitude_as_a_numeric_column,longitude_boundaries)

#Create a feature cross of latitude and longitude
latitude_X_longitude = tf.feature_column.crossed_column([latitude,longitude], hash_bucket_size = 100)
crossed_feature = tf.feature_column.indicator_column(latitude_X_longitude)
feature_columns.append(crossed_feature)

# Represent median_income as a floating-point value.
median_income = tf.feature_column.numeric_column("median_income")
feature_columns.append(median_income)

# Represent population as a floating-point value.
population = tf.feature_column.numeric_column("population")
feature_columns.append(population)

# Convert the list of feature columns into a layer that will later be fed into
# the model.
my_feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [11]:
def create_model(my_learning_rate, my_feature_layer):
  model = tf.keras.models.Sequential()
  model.add(my_feature_layer)
  # Define the first hidden layer with 20 nodes.   
  model.add(tf.keras.layers.Dense(units=20, 
                                  activation='relu', 
                                  name='Hidden1'))
   # Define the second hidden layer with 12 nodes.
  model.add(tf.keras.layers.Dense(units=12, 
                                  activation='relu', 
                                  name='Hidden2'))
   # Define the output layer.
  model.add(tf.keras.layers.Dense(units=1, name='Output'))

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=my_learning_rate),
                 loss="mean_squared_error",
                 metrics=[tf.keras.metrics.MeanSquaredError()])
  return model

   

In [14]:
def train_model(model, dataset, epochs, label_name, batch_size=None):
  # Split the dataset into features and label.
  features = {name:np.array(value) for name, value in dataset.items()}
  label = np.array(features.pop(label_name))
  history = model.fit(x=features, y=labels, batch_size=batch_size,
                      epochs=epochs, shuffle = True)
  # The list of epochs is stored separately from the rest of history.
  epochs = history.epoch
  # To track the progression of training, gather a snapshot
  # of the model's mean squared error at each epoch. 
  hist = pd.DataFrame(history.history)
  mse =  hist("mean_squared_error")

  return epochs, mse
