Dropout es un metodo que nos permite apagar un poircentaje de neuronas de una red, esto produce que las neuronas no se memoricen los datos de entrenamiento disminuyendo la probabilidad de overfitting.


In [1]:
# Baseline Model on the Sonar Dataset
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)


Using TensorFlow backend.


In [2]:
# load dataset
dataframe = pandas.read_csv("C:/Users/joaqu/Desktop/proyecto/ensayos/datos/sonar.csv", header=0)
dataset = dataframe.values
print(dataset)

[[0.02 0.0371 0.0428 ... 0.009 0.0032 'Rock']
 [0.0453 0.0523 0.0843 ... 0.0052 0.0044 'Rock']
 [0.0262 0.0582 0.1099 ... 0.0095 0.0078 'Rock']
 ...
 [0.0522 0.0437 0.018 ... 0.0077 0.0031 'Mine']
 [0.0303 0.0353 0.049 ... 0.0036 0.0048 'Mine']
 [0.026 0.0363 0.0136 ... 0.0061 0.0115 'Mine']]


In [3]:
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [4]:
# baseline
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer= "normal" , activation= "relu" ))
    model.add(Dense(30, kernel_initializer= "normal" , activation= "relu" ))
    model.add(Dense(1, kernel_initializer= "normal" , activation= "sigmoid" ))
    # Compile model
    # nesterov=special momentum 
    #decay = decay lr 
    sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
    model.compile(loss= "binary_crossentropy" , optimizer=sgd, metrics=[ "accuracy" ])
    return model

def dropout_model():
    # esta configuracion de capas me da mejor resultado que la del create_baseline
    model = Sequential()
    model.add(Dropout(0.2, input_shape=(60,)))
    model.add(Dense(60, kernel_initializer="normal" ,activation="relu"))#,kernel_constraint=maxnorm(3.)))
    model.add(Dense(30, kernel_initializer= "normal" , activation= "relu",kernel_constraint=maxnorm(3.)))
    model.add(Dense(1, kernel_initializer= "normal" , activation= "sigmoid" ))
    # Compile model
    # nesterov=special momentum 
    #decay = decay lr 
    sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
    model.compile(loss= "binary_crossentropy" , optimizer=sgd, metrics=[ "accuracy" ])
    return model

def dropout_model2():

    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer="normal" ,activation="relu",kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(30, kernel_initializer="normal", activation="relu", kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(1, kernel_initializer="normal", activation="sigmoid"))
    # Compile model
    sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
    return model


In [5]:
numpy.random.seed(seed)
estimators = []
estimators.append(( "standardize" , StandardScaler()))
estimators.append(( "mlp" , KerasClassifier(build_fn=create_baseline, epochs=300,
                                                                    batch_size=16, verbose=0)))

estimators2 = []
estimators2.append(( "standardize" , StandardScaler()))
estimators2.append(( "mlp" , KerasClassifier(build_fn=dropout_model, epochs=300,
                                                                    batch_size=16, verbose=0)))

estimators3 = []
estimators3.append(( "standardize" , StandardScaler()))
estimators3.append(( "mlp" , KerasClassifier(build_fn=dropout_model2, epochs=300,
                                                                    batch_size=16, verbose=0)))



In [6]:
pipeline = Pipeline(estimators)
pipeline2 = Pipeline(estimators2)
pipeline3 = Pipeline(estimators3)

In [7]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

In [8]:
results = cross_val_score(pipeline2, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 85.54% (5.40%)


In [9]:
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 83.57% (5.57%)


Por algun extraño motivo el ejemplo de dropout no mejora el rendimiento de la red. 
En otros ejemplos del libro sucede lo mismo, debo anotar en cuales pasa y preguntar 

In [None]:
results = cross_val_score(pipeline3, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))