<a href="https://colab.research.google.com/github/olcaykursun/Algorithms/blob/main/softmax_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Copyright 2020 Google LLC. Double-click here for license information.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Simple Linear Regression with the Iris Dataset

In this first Colab, you'll explore linear regression with a simple database.

## Learning objectives:

After doing this exercise, you'll know how to do the following:

  * Run Colabs.
  * Tune the following [hyperparameters](https://developers.google.com/machine-learning/glossary/#hyperparameter):
    * [learning rate](https://developers.google.com/machine-learning/glossary/#learning_rate)
    * number of [epochs](https://developers.google.com/machine-learning/glossary/#epoch)
    * [batch size](https://developers.google.com/machine-learning/glossary/#batch_size)
  * Interpret different kinds of [loss curves](https://developers.google.com/machine-learning/glossary/#loss_curve).

## Import relevant modules

The following cell imports the packages that the program requires:

In [None]:
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt

try_epochs = [10, 20, 40, 80]
try_learning_rates = [0.001, 0.01, 0.1, 0.5, 1, 2, 4]
try_batch_sizes = [1, 2, 4]
n_runs = 5 #run about 10 times for each combination to get a good estimate for the mean of the loss


## Define functions that build and train a model

The following code defines two functions:

  * `build_model(my_learning_rate)`, which builds an empty model.
  * `train_model(model, feature, label, epochs)`, which trains the model from the examples (feature and label) you pass.


In [None]:
#@title Define the functions that build and train a model
def build_model(my_learning_rate):
  """Create and compile a simple linear regression model."""
  # Most simple tf.keras models are sequential.
  # A sequential model contains one or more layers.
  model = tf.keras.models.Sequential()

  # Describe the topography of the model.
  # The topography of a simple linear regression model
  # is a single node in a single layer.
  model.add(tf.keras.layers.Dense(units=1,
                                  input_shape=(1,)))

  # Compile the model topography into code that
  # TensorFlow can efficiently execute. Configure
  # training to minimize the model's mean squared error.
  model.compile(optimizer=tf.keras.optimizers.experimental.RMSprop(learning_rate=my_learning_rate),
                loss="mean_squared_error",
                metrics=[tf.keras.metrics.RootMeanSquaredError()])

  return model


from sklearn.model_selection import train_test_split

def train_model(model, feature, label, epochs, batch_size):
  """Train the model by feeding it data."""

  # Feed the feature values and the label values to the
  # model. The model will train for the specified number
  # of epochs, gradually learning how the feature values
  # relate to the label values.

  history = model.fit(x=feature,
                      y=label,
                      batch_size=batch_size,
                      epochs=epochs, verbose=0) #validation_split=0.5

  # Gather the trained model's weight and bias.
  trained_weight = model.get_weights()[0]
  trained_bias = model.get_weights()[1]

  # The list of epochs is stored separately from the
  # rest of history.
  epochs = history.epoch

  # Gather the history (a snapshot) of each epoch.
  hist = pd.DataFrame(history.history)

  # Specifically gather the model's root mean
  # squared error at each epoch.
  rmse = hist["root_mean_squared_error"]

  return trained_weight, trained_bias, epochs, rmse, model

print("Defined build_model and train_model")

Defined build_model and train_model


In [None]:
#@title Define the plotting functions
def plot_the_model(trained_weight, trained_bias, feature, label):
  """Plot the trained model against the training feature and label."""

  trained_weight = trained_weight[0][0]
  trained_bias = trained_bias[0]
  # Label the axes.
  plt.xlabel("feature")
  plt.ylabel("label")

  # Plot the feature values vs. label values.
  plt.scatter(feature, label)

  # Create a red line representing the model. The red line starts
  # at coordinates (x0, y0) and ends at coordinates (x1, y1).
  x0 = feature.min()
  y0 = trained_bias + (trained_weight * x0)
  x1 = feature.max()
  y1 = trained_bias + (trained_weight * x1)
  plt.plot([x0, x1], [y0, y1], c='r')

  # Render the scatter plot and the red line.
  plt.show()

def plot_the_loss_curve(epochs, rmse):
  """Plot the loss curve, which shows loss vs. epoch."""

  plt.figure()
  plt.xlabel("Epoch")
  plt.ylabel("Root Mean Squared Error")

  plt.plot(epochs, rmse, label="Loss")
  plt.legend()
  plt.ylim([rmse.min()*0.97, rmse.max()])
  plt.show()

print("Defined the plot_the_model and plot_the_loss_curve functions.")

Defined the plot_the_model and plot_the_loss_curve functions.


In [None]:
#@title Try each feature with different hyperparameter combinations for optimization
import numpy as np
from sklearn import datasets
from sklearn.metrics import mean_squared_error

# Load the Iris dataset
iris = datasets.load_iris()
iris_data, _, iris_target, _ = train_test_split(iris.data, iris.target, train_size=0.5)

import time
t1 = time.perf_counter()

best_rmse = np.full(4, np.inf)
best_epochs = np.zeros(4)
best_learning_rates = np.zeros(4)
best_batch_sizes = np.zeros(4)
best_m = np.zeros(4)
best_c = np.zeros(4)

my_label = iris_data[:, 2] #target is petal_length
for var in range(4):
  my_feature = iris_data[:, var] #use sepal_length to predict the target

  for num_epochs in try_epochs:
    for learning_rate in try_learning_rates:
      for batch_size in try_batch_sizes:
        rmses = []
        for run in range(n_runs):

          initialized_model = build_model(learning_rate)
          trained_weight, trained_bias, epochs, rmse, trained_model = train_model(initialized_model, my_feature,
                                                            my_label, num_epochs,
                                                            batch_size)
          y_pred = trained_model.predict(iris.data[:, var], verbose = 0).flatten()
          rmse = np.sqrt(mean_squared_error(iris.data[:, 2], y_pred))
          rmses.append(rmse)
        avg_rmse = sum(rmses)/n_runs
        if best_rmse[var] > avg_rmse:
          best_rmse[var] = avg_rmse
          best_learning_rates[var] = learning_rate
          best_batch_sizes[var] = batch_size
          best_epochs[var] = num_epochs
          best_m[var] = trained_weight[0][0]
          best_c[var] = trained_bias[0]

  print(f'{iris.feature_names[var]} with m={best_m[var]} and c={best_c[var]} gives us of an rmse of {best_rmse[var]}')

t2 = time.perf_counter()
print('Time taken to run:',t2-t1)

sepal length (cm) with m=1.7693334817886353 and c=-7.075587749481201 gives us of an rmse of 0.9775567680262368
sepal width (cm) with m=-1.2916473150253296 and c=7.834952354431152 gives us of an rmse of 1.6246024380963402
petal length (cm) with m=0.997748076915741 and c=-0.0008598674321547151 gives us of an rmse of 0.005351918154869108
petal width (cm) with m=2.1618833541870117 and c=1.1968492269515991 gives us of an rmse of 0.4789826678825887
Time taken to run: 5322.6628045


In [None]:
#@title Print the best settings
print(f'{best_rmse=}')
print(f'{best_epochs=}')
print(f'{best_learning_rates=}')
print(f'{best_batch_sizes=}')

best_rmse=array([0.97755677, 1.62460244, 0.00535192, 0.47898267])
best_epochs=array([40., 80., 80., 80.])
best_learning_rates=array([0.1 , 0.1 , 0.01, 0.01])
best_batch_sizes=array([1., 4., 4., 1.])


In [None]:
#@title Print the equations for the regression lines
for var in range(4):
  if best_c[var] > 0:
    print(f'X[2] ~= {best_m[var]:.2f} * X[{var}] + {best_c[var]:.2f}')
  else:
    print(f'X[2] ~= {best_m[var]:.2f} * X[{var}] - {-best_c[var]:.2f}')

X[2] ~= 1.77 * X[0] - 7.08
X[2] ~= -1.29 * X[1] + 7.83
X[2] ~= 1.00 * X[2] - 0.00
X[2] ~= 2.16 * X[3] + 1.20


In [None]:
#@title Try to do it faster by running the training epochs cumulatively
import time
import numpy as np
from sklearn import datasets
from sklearn.metrics import mean_squared_error

t1 = time.perf_counter()

best_rmse = np.full(4, np.inf)
best_epochs = np.zeros(4)
best_learning_rates = np.zeros(4)
best_batch_sizes = np.zeros(4)

my_label = iris_data[:, 2] #target is petal_length
for var in range(4):
  my_feature = iris_data[:, var] #use sepal_length to predict the target

  for learning_rate in try_learning_rates:
    for batch_size in try_batch_sizes:
      rmses = {num_epochs : [] for num_epochs in try_epochs}
      for run in range(n_runs):
        initialized_model = build_model(learning_rate)
        total_epochs_so_far = 0
        for num_epochs in try_epochs:
          add_epochs = num_epochs - total_epochs_so_far
          total_epochs_so_far = total_epochs_so_far + add_epochs
          trained_weight, trained_bias, epochs, rmse, trained_model = train_model(initialized_model, my_feature,
                                                            my_label, add_epochs,
                                                            batch_size)
          y_pred = trained_model.predict(iris.data[:, var], verbose = 0).flatten()
          rmse = np.sqrt(mean_squared_error(iris.data[:, 2], y_pred))
          rmses[num_epochs].append(rmse)
      avg_rmses = {num_epochs : np.mean(rmses[num_epochs]) for num_epochs in try_epochs}
      epochs_of_min_avg_rmse = min(avg_rmses, key=avg_rmses.get)
      best_avg_rmse_candidate =  avg_rmses[epochs_of_min_avg_rmse]
      if best_rmse[var] > best_avg_rmse_candidate:
        best_rmse[var] = best_avg_rmse_candidate
        best_learning_rates[var] = learning_rate
        best_batch_sizes[var] = batch_size
        best_epochs[var] = epochs_of_min_avg_rmse

  print(f'{iris.feature_names[var]} gives us of an rmse of {best_rmse[var]}')

t2 = time.perf_counter()
print('Time taken to run:',t2-t1)

sepal length (cm) gives us of an rmse of 0.9478995291745204
sepal width (cm) gives us of an rmse of 1.6297173933258418
petal length (cm) gives us of an rmse of 0.003536954419188184
petal width (cm) gives us of an rmse of 0.48129492560939474
Time taken to run: 2801.157854186


In [None]:
#@title Print the best settings found with this faster way
print(f'{best_rmse=}')
print(f'{best_epochs=}')
print(f'{best_learning_rates=}')
print(f'{best_batch_sizes=}')

best_rmse=array([0.94789953, 1.62971739, 0.00353695, 0.48129493])
best_epochs=array([40., 40., 80., 80.])
best_learning_rates=array([0.1 , 0.1 , 0.01, 0.01])
best_batch_sizes=array([1., 1., 1., 2.])


In [None]:
#Speed-up: Ratio of the runtimes
5322 / 2801

1.900035701535166

In [None]:
#Ratio of the number of epochs performed: (10+20+40+80) vs (10+10more+20more+40more) either way we go up to 80 epochs
150/80

1.875