In [None]:
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

In [None]:
(training_features, training_labels), (testing_features, testing_labels) = datasets.boston_housing.load_data()

In [None]:
scaler = StandardScaler() #Instantiate a scaler object
scaler.fit(training_features) #Computes mean and sd for this particular scaler instance to be used below
training_features_scaled = scaler.transform(training_features)
testing_features_scaled = scaler.transform(testing_features)

## FEATURE SCALING IS VERY IMPORTANT

#### https://towardsdatascience.com/all-about-feature-scaling-bcc0ad75cb35

#### "Machine learning algorithm just sees numbers — if there is a vast difference in the range say few ranging in thousands and few ranging in the tens, and it makes the underlying assumption that higher ranging numbers have superiority of some sort. So these more significant number starts playing a more decisive role while training the model."

#### "Most of the times, your dataset will contain features highly varying in magnitudes, units and range. But since, most of the machine learning algorithms use Eucledian distance between two data points in their computations, this is a problem. If left alone, these algorithms only take in the magnitude of features neglecting the units. The results would vary greatly between different units, 5kg and 5000gms. The features with high magnitudes will weigh in a lot more in the distance calculations than features with low magnitudes.

#### "Another reason why feature scaling is applied is that few algorithms like Neural network gradient descent converge much faster with feature scaling than without it."

#### Normalization is used when we want to bound our values between two numbers, typically, between [0,1] or [-1,1]. While Standardization transforms the data to have zero mean and a variance of 1,

In [None]:
training_features.shape

In [None]:
training_features_scaled[0]

In [None]:
testing_features_scaled.shape[1]

In [None]:
model = keras.Sequential()

model.add(layers.Dense(32,activation = 'relu', input_shape = [13]))
#input shape should be the 13 parameters linearly and needs to be in a tuple or list

model.add(layers.Dense(16,activation = 'relu'))

model.add(layers.Dense(1)) #Single output node to predict one single numerical value

model.compile(optimizer=keras.optimizers.Adam(),loss=keras.losses.mse)
#getting optimizers need to have the () but losses don't need.
#metrics parameter accepts a list of keywords to show additional parameter output at each epoch
#adagrad, adadelta, ftrl has high loss but not adam, RMSprop

letsgo = model.fit(training_features_scaled, training_labels, validation_split=0.1, epochs=200, verbose = 0)
# validation split is the percentage of testing data to be set aside for validation 0.1 = 10%
# epochs is number of times going through entire dataset

In [None]:
print( model.predict(testing_features_scaled[50:60]), '\n' ) #predictions
print( testing_labels[50:60], '\n') #actual values
print(model.evaluate(testing_features_scaled,testing_labels)) #display loss value

In [None]:
model2 = keras.Sequential()

model2.add(layers.Dense(32,activation = 'relu', input_shape = [13]))
# input shape should be the 13 parameters linearly and needs to be in a tuple,list or simpler array like structure

model2.add(layers.Dense(16,activation = 'relu'))

model2.add(layers.Dense(1)) #Single output node to predict one single numerical value

model2.compile(optimizer=keras.optimizers.Adam(),loss=keras.losses.mse)

letsgo2 = model.fit(training_features, training_labels, validation_split=0.1, epochs=200, verbose = 0)
#Lack of scaling makes the model not as good

In [None]:
print( model2.predict(testing_features[50:60]), '\n' ) #predictions
print( testing_labels[50:60], '\n') #actual values
print(model2.evaluate(testing_features,testing_labels)) #display loss value