## Regularization

### Process of fighting with overfitting

### Common Regularization methods

#### -> Reducing network size
#### -> Adding weight regularization - L1 and L2
#### -> Early stopping
#### -> Dropout


In [None]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Input, Dense
from keras.optimizers import RMSprop
import matplotlib.pyplot as plt
from keras.models import load_model
from keras.regularizers import l1, l2, l1_l2

## Data Preparation

In [None]:
(X_train, Y_train), (X_test, Y_test) = imdb.load_data(num_words=10000, seed = 19) 
# Keeps 10000 most frequently occuring words in the trainong data

# Encoding the integer sequences into a binary matrix
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

x_train = vectorize_sequences(X_train)
x_test = vectorize_sequences(X_test)

# vectorize labels
y_train = np.asarray(Y_train).astype('float32')
y_test = np.asarray(Y_test).astype('float32')

x_val = x_train[:10000, :]
x_train = x_train[10000: ,  :]
y_val = y_train[:10000]
y_train = y_train[10000:]




##  IMDB Movie Review Classification with L1 Regularization

In [None]:
modell2 = Sequential()
modell2.add(Input((10000,)))
modell2.add(Dense(16, kernel_regularizer=l1(0.001), activation='relu'))
modell2.add(Dense(16, kernel_regularizer=l1(0.001), activation='relu'))
modell2.add(Dense(1, activation='sigmoid'))

print(modell2.summary())

In [None]:
modell2.compile(optimizer=RMSprop(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
epochs = 20
history = modell2.fit(x_train, y_train,  epochs=epochs, batch_size=512, validation_data=(x_val, y_val))

In [None]:
hist = history.history
plt.plot(range(1, epochs+1), hist["accuracy"],marker="*")
plt.plot(range(1, epochs+1), hist["val_accuracy"], marker="^")
plt.legend(["Train", "validation"])
plt.xlabel("epoch")
plt.ylabel("Accuracy")
plt.show()

## IMDB Movie Review Classification with L1 Regularization

### Stopping training when validation loss is no longer improving can be achieved using a Keras callback.

### A callback is an object that is passed to the model in the call to fit.

### Usage: **keras.callbacks.EarlyStopping(monitor, mode, patient)**

#### --> Stops the training when monitored metric with specified mode has stopped improving for ore than specified number of patience.

In [None]:
from keras.callbacks import EarlyStopping

In [None]:
modele = Sequential()
modele.add(Input((10000,)))
modele.add(Dense(16, kernel_regularizer=l1(0.001), activation='relu'))
modele.add(Dense(16, kernel_regularizer=l1(0.001), activation='relu'))
modele.add(Dense(1, activation='sigmoid'))

print(modele.summary())

In [None]:
modele.compile(optimizer=RMSprop(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
history = modele.fit(x_train, 
                      y_train,
                      epochs=epochs, 
                      batch_size=512, 
                      validation_data=(x_val, y_val),
                     callbacks=[EarlyStopping(monitor="val_accuracy", mode = "max", patience=1, restore_best_weights=True)])

In [None]:
hist = history.history
epoch = len(hist["accuracy"])
plt.plot(range(1, epoch+1), hist["accuracy"],marker="*")
plt.plot(range(1, epoch+1), hist["val_accuracy"], marker="^")
plt.legend(["Train", "validation"])
plt.xlabel("epoch")
plt.ylabel("Accuracy")
plt.show()

## Dropout

In [None]:
from keras.layers import Dropout

In [None]:
modeld = Sequential()
modeld.add(Input((10000,)))
modeld.add(Dense(16, kernel_regularizer=l1(0.001), activation='relu'))
modeld.add(Dropout(0.5))
modeld.add(Dense(16, kernel_regularizer=l1(0.001), activation='relu'))
modeld.add(Dropout(0.5))
modeld.add(Dense(1, activation='sigmoid'))

print(modeld.summary())

In [None]:
modeld.compile(optimizer=RMSprop(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
history = modeld.fit(x_train, y_train,  epochs=epochs, batch_size=512, validation_data=(x_val, y_val))

In [None]:
hist = history.history
plt.plot(range(1, epochs+1), hist["accuracy"],marker="*")
plt.plot(range(1, epochs+1), hist["val_accuracy"], marker="^")
plt.legend(["Train", "validation"])
plt.xlabel("epoch")
plt.ylabel("Accuracy")
plt.show()