In [1]:
import tensorflow as tf
from tensorflow import keras

In [0]:
import numpy 
import pandas
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [4]:
from google.colab import files
files.upload()
dataframe = pandas.read_csv("housing.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:13]
Y = dataset[:,13]


Saving housing.csv to housing.csv


In [5]:
dataset

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 3.9690e+02, 4.9800e+00,
        2.4000e+01],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 3.9690e+02, 9.1400e+00,
        2.1600e+01],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 3.9283e+02, 4.0300e+00,
        3.4700e+01],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 5.6400e+00,
        2.3900e+01],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 3.9345e+02, 6.4800e+00,
        2.2000e+01],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 7.8800e+00,
        1.1900e+01]])

In [0]:
# define base model
def baseline_model():
    # create model
    model=Sequential()
    model.add(Dense(13,input_shape=(13,),activation='relu'))
    model.add(Dense(1))
   
    model.compile(loss='mse', optimizer='adam', metrics=['mae'])
    

    return model



In [0]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=0)



In [8]:
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))


Instructions for updating:
If using Keras pass *_constraint arguments to layers.




Results: -50.09 (50.33) MSE


# Baseline Model With Standarized Dataset



In [9]:
# evaluate model with standardized dataset
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))



Standardized: -29.33 (37.50) MSE


# large model

In [0]:
# define larger model
def larger_model():
    # create model
    model = Sequential()
    model.add(Dense(13, activation = 'relu', input_shape = (13,)))
    model.add(Dense(6, activation = 'relu'))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer = 'adam',
                 loss = 'mse',
                 metrics = ['mae'])
    return model

In [11]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=larger_model, epochs=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))




Larger: -23.09 (22.73) MSE


## Wider Model

In [0]:
def wider_model():
    # create model
    model = Sequential()
    model.add(Dense(20, activation = 'relu', input_shape = (13,)))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer = 'Adam',
                 loss = 'mse',
                 metrics = ['mae'])
    return model

In [18]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_model, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))



Wider: -22.82 (23.32) MSE


## Overfit Model

In [0]:
def overfit_model():
    # create model
    model = Sequential()
    model.add(Dense(13, activation = 'relu', input_shape = (13,)))
    model.add(Dense(13, activation = 'relu'))
    model.add(Dense(20, activation = 'relu'))
    model.add(Dense(6, activation = 'relu'))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer = 'Adam',
                 loss = 'mse',
                 metrics = ['mae'])
    return model

In [20]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_model, epochs=200, batch_size=50, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Overfit: %.2f (%.2f) MSE" % (results.mean(), results.std()))



Overfit: -31.76 (31.08) MSE


## Without Using Scikit-Learn
# K-Fold Cross Validation

In [0]:
kf = 10
num_val_samples = len(X) // kf
num_epochs = 50
all_scores = []

In [24]:
for i in range(kf):
    print('processing fold #', i)
    val_data = X[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = Y[i * num_val_samples: (i + 1) * num_val_samples]
    partial_train_data = numpy.concatenate(
        [X[:i * num_val_samples],
         X[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = numpy.concatenate(
        [Y[:i * num_val_samples],
         Y[(i + 1) * num_val_samples:]],
        axis=0)
    
    model = baseline_model()
    
    model.compile(optimizer='adam',
    loss='mse',
              metrics=['mae'])
    model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=5, verbose=0)
    mse, mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(mse)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9


In [25]:
all_scores

[23.330282897949218,
 19.055900802612303,
 26.93008026123047,
 98.31065460205077,
 40.7507445526123,
 112.02011291503906,
 73.69929809570313,
 66.72066764831543,
 35.856346130371094,
 73.25968551635742]

In [27]:
print("K-Fold: %.2f (%.2f)" % (numpy.mean(all_scores), numpy.std(all_scores)))

K-Fold: 56.99 (30.92)
