# Playground: Regression with One Explanatory Variable

Data
  - regularization
  - catching irregularities
  - noise in the data  
  
Building
  - layers
  - nodes
  - activation functions  
  
Compiling
  - learning rate
  - loss functions  
  
Training
  - epochs
  - batch size
  - optimizers
  - overfitting
  - ? dropout
  - ? early stop  
  
Testing
  - ? validation split, validation loss  

In [None]:
%matplotlib inline
import keras
from keras.layers import Dense
from keras.models import Sequential
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

### Generate Dummy Data

In [None]:
# Note: in the following, the data arrays get transposed a lot. Don't worry about it.
# ... just different functions require that the data come in different shapes.

# generate some fake input_data (n data points in range 1 to n with uniform distribution)
n = ___
# set how random you want your data to be in the x direction (try between 0-5)
x_rand_scale = ____
input_data = np.array(range(n)) + np.random.normal(loc=0, scale=x_rand_scale, size=n)

# normalize the input_data by transforming each value to its respective zscore
# neural networks work much better with normalized values
# fill in the blank with 'zscore'
norm_data = scipy.stats.____(input_data.transpose())

# The following are some predefined distributions for your data set
linear = norm_data
parabolic = norm_data**2
sin = np.sin(2*norm_data)
discontinuous = np.array([0 if i > -1  and i < 1 else i*-4 for i in norm_data])

# Set targets to one of the previously defined distributions or write your own
targets = _________

# Add some noise (as all data has). Pick noise between 0-1
noise = ____
targets = targets.transpose() + np.random.normal(loc=0.0, scale=noise, size=n)

# Reshape your data to feed into the model
norm_data = np.array([[i] for i in norm_data])
targets = np.array([[i] for i in targets])

# Plot the data in orange dots
plt.plot(norm_data, targets, 'ro', markersize=2, color="orange")

### Return a Built and Compiled Model

In [None]:
# Return a built and compiled model
# n_cols: number of explanatory features you're planning to feed into the model
# inside this function, you can adjust the number of layers, 
# ... nodes per layer, activation functions, optimizer type,
def get_regression_model(num_features):
    # Set up a 'Sequential()' (feed-forward) model
    model = ___________

    # add a hidden layer. First layer must declare input_shape being the number of features you're feeding into it.
    # try different number of nodes (units). Try different activation functions ("relu", "elu", "tanh", sigmoid"). 
    # if you remove the activation parameter completely, there will be no activation function (ie identity function)
    model.add(Dense(units=___, activation=____, input_shape=(num_features,)))

    # What about adding another hidden layer?

    # Add the output layer. Only 1 node for regression models in the output layer and no activation function.
    model.add(Dense(units=__))

    # Try setting a learning rate (lr) for the Stochastic Gradient Decent (SGD) optimizer
    # and set the loss function, "loss" to mean_squared_error "mse".
    # ... try other optimizers like "adam" by setting optimizer="adam"
    # ... try other loss functions (https://keras.io/losses/)
    SGD = keras.optimizers.sgd(lr=__)
    model.compile(optimizer=SGD, loss=___, metrics=["mse"])

    # return the model built and compiled
    return model

### Build the Keras Neural Network Model

### Fit the model

In [None]:
# get a new regression model: model
# only one feature is being fed into this model
model = get_regression_model(num_features=1)

# start a log of the mean squared error: log
log = {"mse":[]}

# set the number of rounds to train (each round will be multiple epochs)
training_iterations = 10

# fit your model once to see what the model guesses (in green) after one try through your data
# set the epoch to 1 meaning only go through the data set once
history = model.fit(x=norm_data, y=targets, epochs=__, verbose=0)
# attach the history about the mean_squared_error to the log
log["mse"].extend(history.history['mean_squared_error'])
# try to predict the data set with the model
predictions = model.predict(norm_data)
# add a plot of those predictions to the figure
plt.plot(norm_data, predictions, alpha=.5, color="green")
# add a legend entry for this first preditcion
plt.legend(['First Guess'], loc='best')


# then run through your data 'training_iterations' more times
# each iteration will run your model through 5 more epochs (becuase thats what epochs is set to)
for i in range(training_iterations):
    # at each iteration, record the history of how your models performance
    # try adjusting the batch size to find (suggested 1-20)
    history = model.fit(x=______, y=______, epochs=5, batch_size=___, verbose=0)
    # save that history to a log for plotting
    log["mse"].extend(history.history['mean_squared_error'])
    # have the model make predictions off of your normalized input data (like above)
    predictions = model.predict(______)
    # alpha is just a coefficient between 0-1 that will be used to set the color and opacity of the plot
    alpha = (i+1)/training_iterations
    # add a plot to the figure of what your machine guessed for your input data after this iteration
    # "earlier" guesses will be more faded while "later" guesses will be more vivid
    plt.plot(norm_data, predictions, alpha=alpha, color=(alpha, 0.2, 0.5))

# add the plot of the initial input data to the figure
plt.plot(norm_data, targets, 'ro', markersize=2, color="orange")
# show the figure
plt.show()

### Access Model Performance

In [None]:
# now access how your model did on the training data after each iteration by graphing the 'Mean Squared Error'
first_loss = round(log["mse"][0], 5)
final_loss = round(log["mse"][-1], 5)
plt.plot(log["mse"])
plt.annotate(first_loss,(0,log["mse"][0]))
plt.annotate(final_loss,(n,log["mse"][-1]))
plt.legend(['Mean Squared Error'], loc='best')
plt.show()