<a href="https://colab.research.google.com/github/tayfununal/nku_lesson/blob/main/diabetes_dataset_in_sklearn/getting_Overfitting_from_diabetes_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Getting Overfitting on Diabetes Dataset**


In [None]:
import tensorflow as tf
from sklearn.datasets import  load_diabetes

In [None]:
# Load the data

diabetes_dataset = load_diabetes()
print(diabetes_dataset["DESCR"])

In [None]:
# Save the input and target variables

print(diabetes_dataset.keys())

data = diabetes_dataset['data']
targets = diabetes_dataset['target']

In [None]:

# Normalise the target data (this will make clearer training curves)
targets = (targets - targets.mean(axis=0)) / targets.std()

In [None]:
# Split the data into train and test sets

from sklearn.model_selection import train_test_split

train_data, test_data, train_targets, test_targets = train_test_split(data, targets, test_size=0.1)

print(train_data.shape)
print(test_data.shape)
print(train_targets.shape)
print(test_targets.shape)

In [None]:
# Built the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

def get_model():
  model = Sequential([
                      Dense(128, activation='relu', input_shape =(train_data.shape[1],)),
                      Dense(128, activation='relu'),
                      Dense(128, activation='relu'),
                      Dense(128, activation='relu'),
                      Dense(128, activation='relu'),
                      Dense(128, activation='relu'),
                      Dense(1)
  ])
  return model

model = get_model()

In [None]:
# Print the model summary
model.summary()

In [None]:
# Compile the model

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model, with some of the data reserved for validation

history = model.fit(train_data, train_targets, epochs=100,
                    validation_split=0.15, batch_size=64, verbose=False)

In [None]:
# Evaluate the model on the test set

model.evaluate(test_data, test_targets, verbose=2)

In [None]:
# Plot the learning curves

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

In [None]:
# Conclusion
"""
    As seeing, the model is result in overfitting. You should think how to prevent the overfitting.
    One of the solution to the problem is to use the regularization technique.

"""

# **Model Regularizations on Diabetes Dataset**

In [None]:
from tensorflow.keras.layers import Dropout
from tensorflow.keras import regularizers

**Adding regularization with weight dacay and dropout**

In [None]:
# wd is weight decay and rate is dropout rate

def get_regularised_model(wd, rate):
  model = Sequential([
                      Dense(128, kernel_regularizer=regularizers.l2(wd), activation='relu', input_shape=(train_data.shape[1],)),
                      Dropout(rate),
                      Dense(128, kernel_regularizer=regularizers.l2(wd), activation='relu'),
                      Dropout(rate),
                      Dense(128, kernel_regularizer=regularizers.l2(wd), activation='relu'),
                      Dropout(rate),
                      Dense(128, kernel_regularizer=regularizers.l2(wd), activation='relu'),
                      Dropout(rate),
                      Dense(128, kernel_regularizer=regularizers.l2(wd), activation='relu'),
                      Dropout(rate),
                      Dense(128, kernel_regularizer=regularizers.l2(wd), activation='relu'),
                      Dropout(rate),
                      Dense(1)
  ])

  return model

In [None]:
# Re-built the model with weight decay and dropout layers
model = get_regularised_model(1e-5, 0.3)

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model, with some of the data reserved for validation
history = model.fit(train_data, train_targets, epochs=100,
                    validation_split=0.15, batch_size=64, verbose=False)

In [None]:
# Evaluate the model on the test set
model.evaluate(test_data, test_targets, verbose=2)

**Ploting the learning curves**

In [None]:
# Plot the training and validation loss

import matplotlib.pyplot as plt

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

# **Callbacks**

**Introduction the callbacks**

> **Example training callbacks**

In [None]:
# Write a custom callbacks
from tensorflow.keras.callbacks import Callback

class Training_Callbacks(Callback):

  def on_train_begin(self, logs=None):
    print('Starting training...')

  def on_epoch_begin(self, epoch, logs=None):
    print(f'Starting epoch {epoch}')

  def on_train_batch_begin(self, batch, logs=None):
    print(f'Training: Starting batch {batch}')

  def on_train_batch_end(self, batch, logs=None):
    print(f'Training: Finished batch {batch}')
  
  def on_epoch_end(self, epoch, logs=None):
    print(f'Finished epoch {epoch}')
  
  def on_train_end(self, logs=None):
    print(f'Finished training!')

In [None]:
# Re-built the model
model = get_regularised_model(1e-5, 0.3)

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mse')

**Train the model with the callback**

In [None]:
# Train the model, with some of data reserved for validation
model.fit(train_data, train_targets, epochs=3, batch_size=128,verbose=False,
          validation_split=0.15,
          callbacks=[Training_Callbacks()])

In [None]:
# Write a custom callbacks
from tensorflow.keras.callbacks import Callback

class Testining_Callbacks(Callback):

  def on_test_begin(self, logs=None):
    print('Starting testing...')

  def on_test_batch_begin(self, batch, logs=None):
    print(f'Testing: Starting batch {batch}')

  def on_test_batch_end(self, batch, logs=None):
    print(f'Testing: Finished batch {batch}')
  
  def on_test_end(self, logs=None):
    print(f'Finished testing!')

In [None]:
# Evaluate the model
model.evaluate(test_data, test_targets, verbose=False, callbacks=[Testining_Callbacks()])

In [None]:
# Write a custom callbacks
from tensorflow.keras.callbacks import Callback

class Prediction_Callbacks(Callback):

  def on_predict_begin(self, logs=None):
    print('Starting prediction...')

  def on_predict_batch_begin(self, batch, logs=None):
    print(f'Prediction: Starting batch {batch}')

  def on_predict_batch_end(self, batch, logs=None):
    print(f'Prediction: Finished batch {batch}')
  
  def on_predict_end(self, logs=None):
    print(f'Finished prediction!')

In [None]:
model.predict(test_data, verbose=False, callbacks=[Prediction_Callbacks()])

**Using the logs dictionary**

In [None]:
# Build the model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = tf.keras.Sequential([
    Dense(128, activation='relu', input_shape=(train_data.shape[1],)),
    Dense(64,activation='relu'),
    tf.keras.layers.BatchNormalization(),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1)        
])

In [None]:
# Compile the model
    
model.compile(loss='mse', optimizer="adam", metrics=['mae'])

In [None]:
# Create the custom callback

class LossAndMetricCallback(tf.keras.callbacks.Callback):

    # Print the loss after every second batch in the training set
    def on_train_batch_end(self, batch, logs=None):
        if batch %2 ==0:
            print('\n After batch {}, the loss is {:7.2f}.'.format(batch, logs['loss']))
    
    # Print the loss after each batch in the test set
    def on_test_batch_end(self, batch, logs=None):
        print('\n After batch {}, the loss is {:7.2f}.'.format(batch, logs['loss']))
    
    # Print the loss and mean absolute error after each epoch
    def on_epoch_end(self, epoch, logs=None):
        print('Epoch {}: Average loss is {:7.2f}, mean absolute error is {:7.2f}.'.format(epoch, logs['loss'], logs['mae']))
    
    # Notify the user when prediction has finished on each batch
    def on_predict_batch_end(self,batch, logs=None):
        print("Finished prediction on batch {}!".format(batch))

In [None]:
# Train the model

history = model.fit(train_data, train_targets, epochs=20,
                    batch_size=100, callbacks=[LossAndMetricCallback()], verbose=False)

In [None]:
# Get predictions from the model

model_pred = model.predict(test_data, batch_size=10,
                           callbacks=[LossAndMetricCallback()], verbose=False)