In [34]:
from __future__ import absolute_import , division , print_function , unicode_literals
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [10]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense , Activation

print(tf.__version__)

2.0.0


In [3]:
#!pip install -q git+https://github.com/tensorflow/docs

In [4]:
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots

In [5]:
x = pd.read_csv("../trgc1000.csv")
y = pd.read_csv("../ffyrgc1000.csv")
x = x.iloc[:,1:]
y = y.iloc[:,[1]]

### Split the data set into train and test

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
x_train,x_test,y_train ,y_test = train_test_split(x,y)

In [8]:
x_train.shape , y_train.shape,y_train.shape,y_test.shape

((750, 10332), (750, 1), (750, 1), (250, 1))

## Build the Model

In [None]:
x_train.shape

In [None]:
def build_model():
    
    model = keras.Sequential([
        layers.Dense(10332,activation = "relu",input_shape = [len(x_train.keys())]),
        #layers.Dense(5,activation = "relu"),
        layers.Dense(1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(0.001)
    model.compile(
    loss = "mse",
    optimizer = optimizer,metrics = ["mae","mse"])

    return model

In [None]:
model = build_model()

In [None]:
model.summary()

In [None]:
#Now try out the Model. Take a batch of 10examples from the training data and call model.predict on it
example_batch = np.array(x_train[:10])
example_result = model.predict(example_batch)

In [None]:
#example_result*100

In [None]:
# Train the Model for 1000 epochs, and record the training and validation accuracy in the history object
EPOCHS = 1000
history = model.fit(np.array(x_train) , np.array(y_train), epochs=EPOCHS , validation_split=0.2,verbose=0)

In [None]:
hist = pd.DataFrame(history.history)
hist["epoch"] = history.epoch
hist.tail()

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({"Basic":history},metric= "mae")
plt.ylim([0,10])
plt.ylabel("MAE[FF]")

In [None]:
plotter.plot({"Basic":history},metric = "mse")
plt.ylim([0,20])
plt.ylabel("MSE[FFY2]")

### Lets update the model.fit call to automatically stop training when the validation score doesn't improve
WE 'll use an EarlyStopping callback that tests a training condition for every epoch. If a set amount of epochs elapses without showing improvement, then automatically stop the training
More here

In [None]:
model = build_model()
# The patience Parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss",patience=10)

#The patience parameter is the amount of epochs to check for improvement 
early_history = model.fit(np.array(x_train), np.array(y_train), epochs = EPOCHS, validation_split = 0.2, verbose= 0,
                          callbacks = [early_stop, tfdocs.modeling.EpochDots()])

In [None]:
plotter.plot({"Early Stopping":early_history},metric = "mae")
plt.ylim([0,10])

In [None]:
loss , mae , mse = model.evaluate(x_test, y_test , verbose = 2)
print("Testing set Mean Abs Error:{:5.2f}MPG".format(mae))

### Make Predictions 

In [None]:
test_predictions = model.predict(np.array(x_test)).flatten()
a = plt.axes(aspect = "equal")
plt.scatter(y_test , test_predictions)
plt.xlabel("True Values FFY")
plt.ylabel("Predictions FFY")

lims = [0,50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims,lims)

### Lets take a look at the error distribution

In [None]:
error =test_predictions - np.array(y_test)
plt.hist(error, bins=25)
plt.xlabel("Prediction Error FFY")
_ = plt.ylabel("Count")

## Conclusion

This Notebook introduced a few techniques to handle a regression problem <br>
* Mean Squared Error(MSE) is a common loss function used for regression problems (different loss functions are used for calssification problems)
* Evaluation Metrics used for regression differ from classification. A common regression metric is Mean Absolute Error (MAE)
* When Numeric input data features have values with different ranges, each feature should be scaled independently to the same range
* If there is not much training data, one technique is to prefer a small network with few hidden layers to avoid overfittintg.
* Early stopping is a useful technique to prevent overfitting

### Default Model

In [23]:
x_ = np.array(x)
y_ = np.array(y)

In [40]:
x_.shape

(1000, 10332)

In [41]:
from keras.models import Sequential
model =Sequential()
model.add(Dense(32,input_shape= (10332,)))
model.add(Dense(1))
model.compile(optimizer = "adam", loss = "mean_squared_error"  )

# This Builds the model for the first time
model.fit(x_,y_,batch_size = 32, epochs=120)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120


<keras.callbacks.callbacks.History at 0x203c0e03248>

In [38]:
#model.summary()
#model.weights

In [42]:
pd.DataFrame(model.predict(np.array(x_test))[:10])

Unnamed: 0,0
0,0.551289
1,0.632919
2,0.518726
3,0.428922
4,0.592852
5,0.588358
6,0.653202
7,0.644636
8,0.531708
9,0.635265


In [36]:
y_test.head()

Unnamed: 0,V1
313,0.125959
8,0.186891
698,0.05909
699,0.048027
251,0.091914
