# Feed Forward Neural Network Regression with Keras

### Predict fuel efficiency (Copyright 2018 The TensorFlow Authors)

## Imports

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Load data

In [None]:
path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
path

Import it using pandas

In [None]:
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight', 'Acceleration', 'Model Year', 'Origin']
raw_ds = pd.read_csv(path, names=column_names, na_values = "?", comment='\t', sep=" ", skipinitialspace=True)
ds = raw_ds.copy()
ds.tail()

## Data cleansing

In [None]:
ds.isna().sum()

In [None]:
ds = ds.dropna()

The `"Origin"` column is really categorical, not numeric. So convert that to a one-hot:

In [None]:
ds['Origin'] = ds['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})

In [None]:
ds = pd.get_dummies(ds, prefix='', prefix_sep='')
ds.tail()

In [None]:
train_ds = ds.sample(frac=0.8,random_state=0)
test_ds = ds.drop(train_ds.index)

## Data analysis

In [None]:
sns.pairplot(train_ds[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde");

Also look at the overall statistics:

In [None]:
stats = train_ds.describe()
stats.pop("MPG")
stats = stats.transpose()
stats

## Separate target and Normalize

In [None]:
train_labels = train_ds.pop('MPG')
test_labels = test_ds.pop('MPG')

### Normalize the data

Look again at the `train_stats` block above and note how different the ranges of each feature are.

In [None]:
def normalize(x):
  return (x - stats['mean']) / stats['std']

norm_train_ds = normalize(train_ds)
norm_test_ds = normalize(test_ds)

## Build Model

In [None]:
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(train_ds.keys())]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
  return model

In [None]:
model = build_model()
model.summary()

In [None]:
example = norm_train_ds[:10]
model.predict(example)

## Train

In [None]:
res = model.fit(norm_train_ds, train_labels, epochs=1000, validation_split = 0.2, verbose=1)

In [None]:
hist = pd.DataFrame(res.history)
hist['epoch'] = res.epoch
hist.tail()

In [None]:
loss, mae, mse = model.evaluate(norm_test_ds, test_labels, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))

## Predict

Finally, predict MPG values using data in the testing set:

In [None]:
pred = model.predict(norm_test_ds).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, pred)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)


In [None]:
error = pred - test_labels
sns.distplot(error);

In [16]:
from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName('Elephas_App').setMaster('local[8]')
sc = SparkContext(conf=conf)

In [17]:
from elephas.utils.rdd_utils import to_simple_rdd
rdd = to_simple_rdd(sc, norm_train_ds, train_labels)



In [18]:
from elephas.spark_model import SparkModel

spark_model = SparkModel(model, frequency='epoch', mode='asynchronous')
spark_model.fit(rdd, epochs=20, batch_size=32, verbose=0, validation_split=0.1)

  if self.mode is not 'synchronous':


>>> Fit model


ValueError: ('Could not interpret optimizer identifier:', False)

In [None]:
import tensorflow as tf

In [None]:
from tensorflow.keras.utils import generic_utils

## Credits & Links

https://www.tensorflow.org/tutorials/keras/regression