## Importing the Libraries

In [18]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

## Loading the dataset

Loading the shuffled dataset in CSV format named willow_creek_may_sept_shuffled.csv



In [49]:
#Load Dataset
df = pd.read_csv("willow_creek_may_sept_shuffled.csv")
print(df.isnull().any()) #check for missing values
# bool_series = pd.isnull(df["pH"])
# print(df[bool_series])
dataset = df.values
#Split into 5 Input(X) and 1 output (Y) variables
X = dataset[:, 0:5]
Y = dataset[:,5]
#print(X)

Temperature             False
Specific conductance    False
Dissolved oxygen        False
pH                      False
Turbidity               False
Chlorophyll             False
dtype: bool


## Defining Several Models for Comparison


In [59]:
#define base model

def baseline_model():
    #Create Model
    model = Sequential()
    model.add(Dense(5, input_dim=5, kernel_initializer = 'normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer = 'normal'))
    #Compile Model
    model.compile(loss='mean_squared_error', optimizer='adam', metrics = 'accuracy')
    return model

In [60]:
# model = baseline_model()
# model.fit(X, Y, epochs = 100, batch_size = 8)
# _, accuracy = model.evaluate(X,Y)
# print('Accuracy: %.2f' % (accuracy*100))

In [63]:
# evaluate model without Standardization
# Result: Baseline: -56.32 (27.62) MSE
estimator = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=0)
kfold = KFold(n_splits=10)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))

KeyboardInterrupt: 

In [61]:
#Evaluate the Model
#Result Standardized: -30.68 (11.45) MSE
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size = 5, verbose = 0)))
pipeline = Pipeline(estimators)
                  
#K-fold Cross Validation K=10
kfold = KFold(n_splits = 10)
results = cross_val_score(pipeline, X, Y, cv = kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: -30.68 (11.45) MSE


In [64]:
#Deeper Network Topology- 1 Hidden Layer

def deeper_model():
    model = Sequential()
    model.add(Dense(5, input_dim=5, kernel_initializer = 'normal', activation = 'relu'))
    model.add(Dense(3, kernel_initializer = 'normal', activation='relu'))
    model.add(Dense(1, kernel_initializer = 'normal'))
    #Compile Model
    model.compile(loss='mean_squared_error', optimizer='adam', metrics = 'accuracy')
    return model

In [68]:
#Evaluate the Model
#Result Standardized: -47.40 (59.65) MSE (epoch=50, batch=5)
#Result Standardized: -55.73 (77.17) MSE (epoch=50, batch=32)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=deeper_model, epochs=50, batch_size = 32, verbose = 0)))
pipeline = Pipeline(estimators)
                  
#K-fold Cross Validation K=10
kfold = KFold(n_splits = 10)
results = cross_val_score(pipeline, X, Y, cv = kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: -55.73 (77.17) MSE


In [66]:
#Wider Network topology - 1 Hidden Layer

def wider_model():
    model = Sequential()
    model.add(Dense(5, input_dim=5, kernel_initializer = 'normal', activation = 'relu'))
    model.add(Dense(10, kernel_initializer = 'normal', activation='relu'))
    model.add(Dense(1, kernel_initializer = 'normal'))
    #Compile Model
    model.compile(loss='mean_squared_error', optimizer='adam', metrics = 'accuracy')
    return model

In [67]:
#Evaluate the Model
#Result Standardized: -27.36 (10.55) MSE (epoch=100, batch = 5)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_model, epochs=100, batch_size = 5, verbose = 0)))
pipeline = Pipeline(estimators)
                  
#K-fold Cross Validation K=10
kfold = KFold(n_splits = 10)
results = cross_val_score(pipeline, X, Y, cv = kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: -27.36 (10.55) MSE
