<a href="https://colab.research.google.com/github/theunixdisaster/deep-learning-with-python-book/blob/main/FrancoisChap5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Underfitting and overfitting

Adding white noise channels or all-zeros channels to MNIST

In [None]:
from tensorflow.keras.datasets import mnist
import numpy as np

(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28*28)).astype("float32")/255
train_images_with_noise_channels = np.concatenate(
    [train_images, np.random.random((len(train_images), 784))], axis=1)
train_images_with_zeros_channels = np.concatenate(
    [train_images, np.zeros((len(train_images), 784))], axis = 1)


Simple Holdout validation

In [None]:
from tensorflow.keras import layers
from tensorflow import keras
import numpy as np

class Holdout:
  def __init__(self, data, test_data):
    self.data, = data
    self.test_data = test_data

    self.num_validation_samples = 10000
    np.shuffle(self.data)

    self.validation_data = self.data[:self.num_validation_samples]
    self.training_data = self.data[self.num_validation_samples:]
  
  def get_model(self): #We use the model used in the MNIST example
    model = keras.Sequential([
                              layers.Dense(512, activation="relu"),
                              layers.Dense(10, activation="softmax")
    ])
    model.compile(
        optimizer="rmsprop",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    return model

  def fit_data(self):
    model = self.get_model()
    model.fit(self.training_data, ...)
    validation_score = model.evaluate(self.validation_data, ...)

    #Then we do the usual things that are to be done inorder get the right number of epochs

    model = self.get_model()
    model.fit(np.concatenate([self.training_data, self.validation_data]))
    self.test_score = model.evaluate(self.test_data, ...)

    return self.test_score

K-fold cross-validation

In [None]:
import numpy as np
from tensorflow import keras

class KVal:
  def __init__(self, data, test_data):
    self.data = data
    self.test_data = test_data

    self.k=3
    self.num_validation_samples = len(self.data) // self.k
    np.random.shuffle(self.data)
    self.validation_scores = []
    for fold in range(self.k):
      self.validation_data = self.data[self.num_validation_samples*fold : self.num_validation_samples*(fold+1)]
      self.training_data = np.concatenate(
          self.data[:self.num_validation_samples*fold], self.data[self.num_validation_samples*(fold+1):]
      )

      model = self.get_model()
      model.fit(self.training_data, ...)
      self.validation_score = model.evaluate(self.validation_data, ...)
      self.validation_scores.append(self.validation_score)
    
    self.validation_score = np.average(self.validation_scores)
    #Values are plotted adjusted as necessary to avoid overfitting


  def get_model(self):
    #This is a keras model just like in the Holdout Class
    return keras.Model #Just to mimic the actual get_model function

  def evaluate_test(self):
    model = self.get_model()
    model.fit(self.data, ...) #By using the adjusted values
    test_score = model.evaluate(test_data, ...)
    return test_score

# Improving model fit

The three common problems that we'll encounter when trying for overfitting is


*   Training doesn't get started
*   Training gets started just fine, but your model doesn't meaningfully generalize
*   Training and validation loss both go down over time, and you can beat your baseline(commonsense baseline put forward), but it doesn't get into a state of overfitting

Few of the things we can do to solve the problem is to


*   Adjust the learning rate
*   See if sufficient information is provided
*   See if the model designed is suitable for the problem









