In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, BatchNormalization

In [29]:
#from keras.wrappers.scikit_learn import KerasRegressor # depreciated
from scikeras.wrappers import KerasClassifier, KerasRegressor

from sklearn.model_selection import train_test_split, cross_val_score, KFold

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes


In [4]:
# using diabetes samples from sklearn
diabetes = load_diabetes()

diabetes.keys()

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])

In [17]:
print(diabetes.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

In [18]:
pd.DataFrame(diabetes.data, columns=("age", "sex", "bmi", "map", "tc", "ldl", "hdl", "tch", "ltg", "glu")).head(2)

Unnamed: 0,age,sex,bmi,map,tc,ldl,hdl,tch,ltg,glu
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204


In [10]:
diabetes.feature_names # Columns which we used upper

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [21]:
diabetes.target[:5] # y label

array([151.,  75., 141., 206., 135.])

## Assign to x and Y

In [22]:
x = diabetes.data
y = diabetes.target
x.shape, y.shape

((442, 10), (442,))

## Design a small model

In [23]:
def reg_model():
    """
    very basic small regression model,
    param : Nothing
    return : regresion model
    """
    model = Sequential()
    model.add(Dense(10, input_dim = 10, activation = "relu"))
    model.add(Dense(16, activation = "relu"))
    
    model.add(Dense(1)) # linear regression so one output
    
    # Compile the Model
    model.compile(loss = "mean_squared_error", optimizer = "adam")
    return model

In [24]:
simple_model = reg_model()
simple_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                110       
                                                                 
 dense_1 (Dense)             (None, 16)                176       
                                                                 
 dense_2 (Dense)             (None, 1)                 17        
                                                                 
Total params: 303
Trainable params: 303
Non-trainable params: 0
_________________________________________________________________


In [31]:
# Split the data , fit & Predict
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.1, random_state=0)
estimator = KerasRegressor(model=reg_model, epochs = 100, batch_size = 10, verbose = 0)
estimator.fit(x_train, y_train)

KerasRegressor(
	model=<function reg_model at 0x000001B9EE2BA040>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=10
	validation_batch_size=None
	verbose=0
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=100
)

In [32]:
y_pred = estimator.predict(x_test)

# Root Mean Square
mse = mean_squared_error(y_test, y_pred)
print("RMSE ", mse ** 0.5)

RMSE  56.16105209648738


## Creating Deep Neural Network 
##### Adding Dropout and BatchNormalization

In [42]:
def deep_reg_model():
    """
    Since our data size is very small so i think the result will not be very big difference
    
    """
    
    model = Sequential()
    model.add(Dense(10, input_dim = 10, activation = "relu"))
    model.add(BatchNormalization()) # Normalized the input 
    model.add(Dropout(0.2))
    
    model.add(Dense(256, activation= "relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(Dense(128, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(Dense(64, activation = "relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    # output layer
    model.add(Dense(1))
    
    ## Compile the model
    model.compile(loss = "mean_squared_error", optimizer = "adam")
    return model

In [44]:
deep_reg_model().summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_22 (Dense)            (None, 10)                110       
                                                                 
 batch_normalization_5 (Batc  (None, 10)               40        
 hNormalization)                                                 
                                                                 
 dropout_4 (Dropout)         (None, 10)                0         
                                                                 
 dense_23 (Dense)            (None, 256)               2816      
                                                                 
 batch_normalization_6 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dropout_5 (Dropout)         (None, 256)              

In [47]:
# Use data split and fit to run the model
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.1, random_state=0)
estimator = KerasRegressor(model=deep_reg_model, epochs=100, batch_size=10, verbose=0)

estimator.fit(x_train, y_train)
y_pred = estimator.predict(x_test)

## Show its root mean Square Error
mse = mean_squared_error(y_test, y_pred)
print("KERAS REG RMSE : %.2f" % (mse ** 0.5))

KERAS REG RMSE : 59.94


## Use Kfold and CV to run the model

In [49]:
seed = 10
np.random.seed(seed)
estimator = KerasRegressor(model=deep_reg_model, epochs=100, batch_size=10, verbose=0)
kfold = KFold(n_splits=10)

# Show its root mean square error
results = cross_val_score(estimator, x, y, scoring="neg_mean_squared_error", cv = kfold)
mse = -results.mean()
print("Keras Reg RMSE : %.2f" %(mse ** 0.5))

Keras Reg RMSE : 57.99


In [50]:
# krishdb38@gmail.com