In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print (tf.__version__)

2.18.0


loading dataset

In [2]:
from pandas.core import indexing
column_names = ['longitude','latitude','housing_median_age',
                'total_rooms','total_bedrooms','population','households',
                'median_income','ocean_proximity','median_house_value']

raw_dataset = pd.read_csv('/content/housing.csv', names= column_names,
                          na_values="?", comment='\t',
                          sep=",",index_col=False, skiprows=1)

dataset = raw_dataset.copy()
dataset.tail()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
20635,-121.09,39.48,25,1665,374.0,845,330,1.5603,INLAND,78100
20636,-121.21,39.49,18,697,150.0,356,114,2.5568,INLAND,77100
20637,-121.22,39.43,17,2254,485.0,1007,433,1.7,INLAND,92300
20638,-121.32,39.43,18,1860,409.0,741,349,1.8672,INLAND,84700
20639,-121.24,39.37,16,2785,616.0,1387,530,2.3886,INLAND,89400


In [3]:
dataset.isna().sum()

Unnamed: 0,0
longitude,0
latitude,0
housing_median_age,0
total_rooms,0
total_bedrooms,207
population,0
households,0
median_income,0
ocean_proximity,0
median_house_value,0


In [4]:
dataset = dataset.dropna()
dataset

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
0,-122.23,37.88,41,880,129.0,322,126,8.3252,NEAR BAY,452600
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,NEAR BAY,358500
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,NEAR BAY,352100
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,NEAR BAY,341300
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,NEAR BAY,342200
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25,1665,374.0,845,330,1.5603,INLAND,78100
20636,-121.21,39.49,18,697,150.0,356,114,2.5568,INLAND,77100
20637,-121.22,39.43,17,2254,485.0,1007,433,1.7000,INLAND,92300
20638,-121.32,39.43,18,1860,409.0,741,349,1.8672,INLAND,84700


In [5]:
# origin = dataset.pop('ocean_proximity')
# dataset['USA'] = (origin == '') * 1.0
# dataset['Europe'] = (origin == 2) * 1.0
# dataset['Japan'] = (origin == 3) * 1.0
# dataset.tail()
dataset = pd.get_dummies(dataset, columns=['ocean_proximity'], prefix='ocean_proximity')
dataset

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN
0,-122.23,37.88,41,880,129.0,322,126,8.3252,452600,False,False,False,True,False
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,358500,False,False,False,True,False
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,352100,False,False,False,True,False
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,341300,False,False,False,True,False
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,342200,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25,1665,374.0,845,330,1.5603,78100,False,True,False,False,False
20636,-121.21,39.49,18,697,150.0,356,114,2.5568,77100,False,True,False,False,False
20637,-121.22,39.43,17,2254,485.0,1007,433,1.7000,92300,False,True,False,False,False
20638,-121.32,39.43,18,1860,409.0,741,349,1.8672,84700,False,True,False,False,False


In [6]:
# Convert all boolean columns to integers (True -> 1, False -> 0)
dataset = dataset.astype({col: 'int' for col in dataset.select_dtypes('bool').columns})
dataset

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN
0,-122.23,37.88,41,880,129.0,322,126,8.3252,452600,0,0,0,1,0
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,358500,0,0,0,1,0
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,352100,0,0,0,1,0
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,341300,0,0,0,1,0
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,342200,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25,1665,374.0,845,330,1.5603,78100,0,1,0,0,0
20636,-121.21,39.49,18,697,150.0,356,114,2.5568,77100,0,1,0,0,0
20637,-121.22,39.43,17,2254,485.0,1007,433,1.7000,92300,0,1,0,0,0
20638,-121.32,39.43,18,1860,409.0,741,349,1.8672,84700,0,1,0,0,0


In [7]:
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [8]:
train_stats = train_dataset.describe()
train_stats

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN
count,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0,16346.0
mean,-119.564154,35.630318,28.664505,2622.235776,535.281659,1416.087055,496.758167,3.869337,206916.154411,0.441454,0.319405,0.000306,0.109874,0.128961
std,2.002618,2.138574,12.556764,2169.548287,418.469078,1103.842065,379.109535,1.902228,115676.394484,0.496576,0.466261,0.017487,0.312742,0.335167
min,-124.35,32.55,1.0,11.0,3.0,3.0,3.0,0.4999,14999.0,0.0,0.0,0.0,0.0,0.0
25%,-121.79,33.93,18.0,1448.0,296.0,784.25,280.0,2.555675,119300.0,0.0,0.0,0.0,0.0,0.0
50%,-118.49,34.25,29.0,2119.0,432.5,1164.0,408.0,3.5332,179700.0,0.0,0.0,0.0,0.0,0.0
75%,-118.0,37.71,37.0,3120.75,644.0,1711.0,600.0,4.744225,265900.0,1.0,1.0,0.0,0.0,0.0
max,-114.47,41.95,52.0,39320.0,6445.0,28566.0,6082.0,15.0001,500001.0,1.0,1.0,1.0,1.0,1.0


seprate the output fields from the othe fields


In [9]:
train_labels = train_dataset.pop('median_house_value')
test_labels = test_dataset.pop('median_house_value')

In [10]:
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
longitude,16346.0,-119.564154,2.002618,-124.35,-121.79,-118.49,-118.0,-114.47
latitude,16346.0,35.630318,2.138574,32.55,33.93,34.25,37.71,41.95
housing_median_age,16346.0,28.664505,12.556764,1.0,18.0,29.0,37.0,52.0
total_rooms,16346.0,2622.235776,2169.548287,11.0,1448.0,2119.0,3120.75,39320.0
total_bedrooms,16346.0,535.281659,418.469078,3.0,296.0,432.5,644.0,6445.0
population,16346.0,1416.087055,1103.842065,3.0,784.25,1164.0,1711.0,28566.0
households,16346.0,496.758167,379.109535,3.0,280.0,408.0,600.0,6082.0
median_income,16346.0,3.869337,1.902228,0.4999,2.555675,3.5332,4.744225,15.0001
ocean_proximity_<1H OCEAN,16346.0,0.441454,0.496576,0.0,0.0,0.0,1.0,1.0
ocean_proximity_INLAND,16346.0,0.319405,0.466261,0.0,0.0,0.0,1.0,1.0


model training

In [11]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

def build_model():
  model = keras.Sequential([
      layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

  optimizer =tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer = optimizer,
                metrics = ['mae', 'mse'])
  return model



In [12]:
model = build_model()
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
EPOCHS = 1000
history = model.fit(
    normed_train_data, train_labels,
    epochs = EPOCHS
)

Epoch 1/1000
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 9948183552.0000 - mae: 75103.3438 - mse: 9948183552.0000
Epoch 2/1000
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 8723932160.0000 - mae: 68868.1328 - mse: 8723932160.0000
Epoch 3/1000
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 7962162688.0000 - mae: 65432.8906 - mse: 7962162688.0000
Epoch 4/1000
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7448031744.0000 - mae: 62968.2188 - mse: 7448031744.0000
Epoch 5/1000
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6880701440.0000 - mae: 60157.7109 - mse: 6880701440.0000
Epoch 6/1000
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6654519296.0000 - mae: 58480.1133 - mse: 6654519296.0000
Epoch 7/1000
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms

model testing


In [17]:
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
loss,mae,mse

(3158069504.0, 37732.5703125, 3158069504.0)

In [18]:
test_dataset

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN
10,-122.26,37.85,52,2202,434.0,910,402,3.2031,0,0,0,1,0
13,-122.26,37.84,52,696,191.0,345,174,2.6736,0,0,0,1,0
19,-122.27,37.84,52,1503,298.0,690,275,2.6033,0,0,0,1,0
28,-122.28,37.84,50,2082,492.0,1131,473,1.6424,0,0,0,1,0
40,-122.26,37.83,52,1665,419.0,946,395,2.0978,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20605,-121.58,39.12,26,2796,629.0,2017,632,1.8355,0,1,0,0,0
20609,-121.56,39.11,18,2171,480.0,1527,447,2.3011,0,1,0,0,0
20620,-121.48,39.05,40,198,41.0,151,48,4.5625,0,1,0,0,0
20622,-121.44,39.00,20,755,147.0,457,157,2.4167,0,1,0,0,0


In [19]:
test_predictions = model.predict(normed_test_data).flatten()
test_predictions

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


array([233902.36 , 187942.12 , 164362.42 , ..., 129048.07 ,  92744.164,
       128648.03 ], dtype=float32)

In [20]:
test_labels

Unnamed: 0,median_house_value
10,281500
13,191300
19,162900
28,108900
40,155400
...,...
20605,61200
20609,57500
20620,100000
20622,67000


In [22]:
kearas_file = "house_prediction.h5"
tf.keras.models.save_model(model, kearas_file)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open("housePricePredmodel.tflite", 'wb').write(tflite_model)



Saved artifact at '/tmp/tmphmt2idmx'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 13), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  137670792700432: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137670792704080: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137670792702352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137670792700624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137670792704656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137670792700816: TensorSpec(shape=(), dtype=tf.resource, name=None)


22368