# Chương 20 Reduce Overfitting with Dropout Regularization

## Dropout Regularization in Keras

In [2]:
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import SGD
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Load dataset
dataframe = read_csv("sonar.csv", header=None)
dataset = dataframe.values

# Split into input (X) and output (Y) variables
X = dataset[:, 0:60].astype(float)
Y = dataset[:, 60]

# Encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# Baseline model creation function
def create_baseline():
    # Create the model
    model = Sequential()
    model.add(Input(shape=(60,)))  # Explicitly use Input layer
    model.add(Dense(60, activation='relu'))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile model
    sgd = SGD(learning_rate=0.01, momentum=0.8)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

# Create the pipeline with standardization and model
estimators = []
estimators.append(('standardize', StandardScaler()))  # Standardize data
estimators.append(('mlp', KerasClassifier(model=create_baseline, epochs=300, batch_size=16, verbose=0)))  # MLP model
pipeline = Pipeline(estimators)

# Use StratifiedKFold for cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Evaluate the model using cross-validation
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)

# Print the results
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 88.90% (7.49%)


## Using Dropout on the Visible Layer

In [3]:
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.optimizers import SGD
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

# Load dataset
dataframe = read_csv("sonar.csv", header=None)
dataset = dataframe.values

# Split into input (X) and output (Y) variables
X = dataset[:, 0:60].astype(float)
Y = dataset[:, 60]

# Encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# Dropout in the input layer with weight constraint
def create_model():
    model = Sequential()
    model.add(Input(shape=(60,)))  # Sử dụng Input layer đúng cách
    model.add(Dropout(0.2))
    model.add(Dense(60, activation='relu', kernel_constraint=MaxNorm(3)))
    model.add(Dense(30, activation='relu', kernel_constraint=MaxNorm(3)))
    model.add(Dense(1, activation='sigmoid'))
    
    sgd = SGD(learning_rate=0.1, momentum=0.9)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

# Tạo pipeline gồm chuẩn hóa + model
estimators = [
    ('standardize', StandardScaler()),
    ('mlp', KerasClassifier(model=create_model, epochs=300, batch_size=16, verbose=0))
]
pipeline = Pipeline(estimators)

# Sử dụng StratifiedKFold để đánh giá mô hình
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)

# In kết quả
print("Visible: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))


Visible: 84.12% (7.61%)


## Using Dropout on Hidden Layers

In [5]:
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.optimizers import SGD
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

# Load dataset
dataframe = read_csv("sonar.csv", header=None)
dataset = dataframe.values

# Split into input (X) and output (Y) variables
X = dataset[:, 0:60].astype(float)
Y = dataset[:, 60]

# Encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# Dropout in hidden layers with weight constraint
def create_model():
    model = Sequential()
    model.add(Input(shape=(60,)))  # Dùng Input layer thay vì input_shape trong Dense
    model.add(Dense(60, activation='relu', kernel_constraint=MaxNorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(30, activation='relu', kernel_constraint=MaxNorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))

    sgd = SGD(learning_rate=0.1, momentum=0.9)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

# Create the pipeline
estimators = [
    ('standardize', StandardScaler()),
    ('mlp', KerasClassifier(model=create_model, epochs=300, batch_size=16, verbose=0))
]
pipeline = Pipeline(estimators)

# Evaluate model with stratified k-fold cross-validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)

# Print results
print("Hidden: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))


Hidden: 85.62% (5.56%)
