# Imports

In [1]:
import keras
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt
import seaborn as sns
import kaggle
from zipfile import ZipFile



In [2]:
zf = ZipFile('archive.zip')
zf.extractall('data/') #save files in selected folder
zf.close()

df = pd.read_csv('data/housing.csv')
df.head(10)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
5,-122.25,37.85,52.0,919.0,213.0,413.0,193.0,4.0368,269700.0,NEAR BAY
6,-122.25,37.84,52.0,2535.0,489.0,1094.0,514.0,3.6591,299200.0,NEAR BAY
7,-122.25,37.84,52.0,3104.0,687.0,1157.0,647.0,3.12,241400.0,NEAR BAY
8,-122.26,37.84,42.0,2555.0,665.0,1206.0,595.0,2.0804,226700.0,NEAR BAY
9,-122.25,37.84,52.0,3549.0,707.0,1551.0,714.0,3.6912,261100.0,NEAR BAY


# Cleaning

In [3]:
df.isna().mean()

longitude             0.000000
latitude              0.000000
housing_median_age    0.000000
total_rooms           0.000000
total_bedrooms        0.010029
population            0.000000
households            0.000000
median_income         0.000000
median_house_value    0.000000
ocean_proximity       0.000000
dtype: float64

In [4]:
df.total_bedrooms.fillna(method='pad', inplace=True)
df_drop = df.dropna()

In [5]:
df.isna().mean()

longitude             0.0
latitude              0.0
housing_median_age    0.0
total_rooms           0.0
total_bedrooms        0.0
population            0.0
households            0.0
median_income         0.0
median_house_value    0.0
ocean_proximity       0.0
dtype: float64

# Features

In [11]:
for item in list(df.ocean_proximity.unique()):
    df[item] = (df.ocean_proximity == item)

data = df.drop('ocean_proximity', axis=1)
    
for item in list(df_drop.ocean_proximity.unique()):
    df_drop[item] = (df_drop.ocean_proximity == item)

data_drop = df_drop.drop('ocean_proximity', axis=1)

# Model

## Model_fillna

In [12]:
import keras
import numpy as np
from keras.layers import Input
from keras.layers import Dense
from keras.layers import BatchNormalization
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from sklearn.model_selection import train_test_split

x = data.drop('median_house_value', axis=1).astype('float32')
y = data.median_house_value

#x[float_conv] = x[float_conv].astype('float32')
x = np.asarray(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [13]:
inputs = Input(shape=(x_train.shape[1],))
norm = BatchNormalization(axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer="zeros",
    gamma_initializer="ones",
    moving_mean_initializer="zeros",
    moving_variance_initializer="ones")
x = norm(inputs)
x = Dense(round(x_train.shape[1]), activation='relu')(x)
x = Dense(round(x_train.shape[1]/2), activation='relu')(x)
x = Dense(5, activation='relu')(x)
outputs = Dense(1)(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="Rain")
model.summary()

Model: "Rain"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 13)]              0         
                                                                 
 batch_normalization_1 (Batc  (None, 13)               52        
 hNormalization)                                                 
                                                                 
 dense_4 (Dense)             (None, 13)                182       
                                                                 
 dense_5 (Dense)             (None, 6)                 84        
                                                                 
 dense_6 (Dense)             (None, 5)                 35        
                                                                 
 dense_7 (Dense)             (None, 1)                 6         
                                                              

In [14]:
from keras import backend as K

def coeff_determination(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true-y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

model.compile(
    loss=tf.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error"),
    optimizer=tf.keras.optimizers.RMSprop(),
    metrics=[coeff_determination],
)

history = model.fit(x_train, y_train, batch_size=32, epochs=100, validation_split=0.4, verbose=0)

test_scores = model.evaluate(x_test, y_test, verbose=2)
print("Test loss:", test_scores[0])
print("Test accuracy:", test_scores[1])

129/129 - 0s - loss: 4880592896.0000 - coeff_determination: 0.6034 - 370ms/epoch - 3ms/step
Test loss: 4880592896.0
Test accuracy: 0.6033796668052673


## Model drop

In [15]:
import keras
import numpy as np
from keras.layers import Input
from keras.layers import Dense
from keras.layers import BatchNormalization
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from sklearn.model_selection import train_test_split

x = data_drop.drop('median_house_value', axis=1).astype('float32')
y = data_drop.median_house_value

#x[float_conv] = x[float_conv].astype('float32')
x = np.asarray(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [16]:
inputs = Input(shape=(x_train.shape[1],))
norm = BatchNormalization(axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer="zeros",
    gamma_initializer="ones",
    moving_mean_initializer="zeros",
    moving_variance_initializer="ones")
x = norm(inputs)
x = Dense(round(x_train.shape[1]), activation='relu')(x)
x = Dense(round(x_train.shape[1]/2), activation='relu')(x)
x = Dense(5, activation='relu')(x)
outputs = Dense(1)(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="Rain")
model.summary()

Model: "Rain"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 13)]              0         
                                                                 
 batch_normalization_2 (Batc  (None, 13)               52        
 hNormalization)                                                 
                                                                 
 dense_8 (Dense)             (None, 13)                182       
                                                                 
 dense_9 (Dense)             (None, 6)                 84        
                                                                 
 dense_10 (Dense)            (None, 5)                 35        
                                                                 
 dense_11 (Dense)            (None, 1)                 6         
                                                              

In [17]:
model.compile(
    loss=tf.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error"),
    optimizer=tf.keras.optimizers.RMSprop(),
    metrics=[coeff_determination],
)

history = model.fit(x_train, y_train, batch_size=32, epochs=100, validation_split=0.4, verbose=0)

test_scores = model.evaluate(x_test, y_test, verbose=2)
print("Test loss:", test_scores[0])
print("Test accuracy:", test_scores[1])

129/129 - 0s - loss: 4947945984.0000 - coeff_determination: 0.5961 - 336ms/epoch - 3ms/step
Test loss: 4947945984.0
Test accuracy: 0.5961284637451172
