In [1]:
import tensorflow.keras as keras
import pandas as pd
import numpy as numpy
import matplotlib.pyplot as plt 
import seaborn as sns

In [2]:
df = pd.read_csv('housing.csv')

In [3]:
df.shape

(20640, 10)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20433 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
dtypes: float64(9), object(1)
memory usage: 1.6+ MB


In [5]:
df = df.dropna()

## first 8 columns are features, 9th col is label and 10th is not used

In [6]:
labels = df.median_house_value

In [7]:
features = df.copy()

In [8]:
features.drop(columns=['median_house_value', 'ocean_proximity'], inplace=True)

In [9]:
features.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462


In [10]:
features.shape

(20433, 8)

In [11]:
labels.shape

(20433,)

## Normalization

In [12]:
mean = features.mean(axis=0)
std = features.std(axis=0)

In [13]:
features = features - mean
features = features / std

In [14]:
mean

longitude             -119.570689
latitude                35.633221
housing_median_age      28.633094
total_rooms           2636.504233
total_bedrooms         537.870553
population            1424.946949
households             499.433465
median_income            3.871162
dtype: float64

In [15]:
features.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,-1.327281,1.051692,0.982139,-0.803793,-0.970301,-0.973296,-0.976809,2.345106
1,-1.32229,1.04233,-0.606195,2.04208,1.348243,0.861318,1.670332,2.332575
2,-1.332272,1.037649,1.855723,-0.535176,-0.825541,-0.819749,-0.843406,1.782896
3,-1.337263,1.037649,1.855723,-0.623495,-0.71875,-0.765037,-0.733544,0.932947
4,-1.337263,1.037649,1.855723,-0.461959,-0.611959,-0.75886,-0.628914,-0.013143


In [16]:
labels.head()

0    452600.0
1    358500.0
2    352100.0
3    341300.0
4    342200.0
Name: median_house_value, dtype: float64

## Neural Network Model

In [17]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(8, activation='relu', input_shape=(8, )))
model.add(keras.layers.Dense(1))

model.compile(
    optimizer='rmsprop',
    loss='mse',
    metrics=['mae']
)

In [18]:
model.build()

In [19]:
model.fit(features, labels, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1f81fec6eb0>

In [20]:
test_df = pd.DataFrame({
    'longitude': [-119.85], 
    'latitude': [37.48], 
    'housing_median_age': [22],
    'total_rooms': [2850],
    'total_bedrooms': [500],
    'population': [1150],
    'households': [460],
    'median_income': [3.12]
})

In [21]:
test_df    

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,-119.85,37.48,22,2850,500,1150,460,3.12


In [22]:
test_df = test_df - mean
test_df = test_df / std

In [23]:
test_df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,-0.139406,0.864456,-0.526779,0.097698,-0.089872,-0.242627,-0.103148,-0.395496


In [24]:
model.predict(test_df)

array([[3419.7246]], dtype=float32)

# The predicted median house value is 38523.855