In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

import pandas as pd
import io
import os
import requests
import numpy as np
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.stats import zscore

pd.options.mode.chained_assignment = None  # default='warn'




df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/auto-mpg.csv", 
    na_values=['NA', '?'])
cars = df["name"]
nonna = df[df["horsepower"].notna()]
na = df[df["horsepower"].isnull()]
X = nonna[["cylinders", "displacement", "acceleration"]]
y = nonna["horsepower"]
reg = LinearRegression().fit(X, y)
na["horsepower"] = reg.predict(na[["cylinders", "displacement", "acceleration"]])
df = pd.concat([nonna, na], axis=0)
df = df.reindex(np.random.permutation(df.index))
or_dum = pd.get_dummies(df["origin"], prefix="origin_")
df = pd.concat([df, or_dum], axis=1)
df.drop("origin", inplace=True, axis=1)
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,name,origin__1,origin__2,origin__3
95,12.0,8,455.0,225.0,4951,11.0,73,buick electra 225 custom,1,0,0
116,16.0,8,400.0,230.0,4278,9.5,73,pontiac grand prix,1,0,0
283,20.2,6,232.0,90.0,3265,18.2,79,amc concord dl 6,1,0,0
339,26.6,4,151.0,84.0,2635,16.4,81,buick skylark,1,0,0
42,12.0,8,383.0,180.0,4955,11.5,71,dodge monaco (sw),1,0,0


In [5]:
x = df[['cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'year', 'origin__1', "origin__2", "origin__3"]].values
y = df['mpg'].values 

In [6]:
model = Sequential()
model.add(Dense(25, input_dim=x.shape[1], activation='relu')) # Hidden 1
model.add(Dense(10, activation='relu')) # Hidden 2
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x,y,verbose=2,epochs=200)

Train on 398 samples
Epoch 1/200
398/398 - 2s - loss: 22100.6686
Epoch 2/200
398/398 - 0s - loss: 4060.1257
Epoch 3/200
398/398 - 0s - loss: 1101.0118
Epoch 4/200
398/398 - 0s - loss: 847.3011
Epoch 5/200
398/398 - 0s - loss: 699.6429
Epoch 6/200
398/398 - 0s - loss: 583.1104
Epoch 7/200
398/398 - 0s - loss: 540.9216
Epoch 8/200
398/398 - 0s - loss: 511.0837
Epoch 9/200
398/398 - 0s - loss: 478.2251
Epoch 10/200
398/398 - 0s - loss: 448.2805
Epoch 11/200
398/398 - 0s - loss: 417.0482
Epoch 12/200
398/398 - 0s - loss: 391.2414
Epoch 13/200
398/398 - 0s - loss: 357.6192
Epoch 14/200
398/398 - 0s - loss: 330.4791
Epoch 15/200
398/398 - 0s - loss: 306.1447
Epoch 16/200
398/398 - 0s - loss: 277.2476
Epoch 17/200
398/398 - 0s - loss: 251.0764
Epoch 18/200
398/398 - 0s - loss: 227.5821
Epoch 19/200
398/398 - 0s - loss: 204.0147
Epoch 20/200
398/398 - 0s - loss: 185.0441
Epoch 21/200
398/398 - 0s - loss: 164.5667
Epoch 22/200
398/398 - 0s - loss: 147.0716
Epoch 23/200
398/398 - 0s - loss: 134.

Epoch 193/200
398/398 - 0s - loss: 15.4722
Epoch 194/200
398/398 - 0s - loss: 13.4055
Epoch 195/200
398/398 - 0s - loss: 13.2594
Epoch 196/200
398/398 - 0s - loss: 14.0973
Epoch 197/200
398/398 - 0s - loss: 12.9503
Epoch 198/200
398/398 - 0s - loss: 12.9859
Epoch 199/200
398/398 - 0s - loss: 13.0542
Epoch 200/200
398/398 - 0s - loss: 12.9596


<tensorflow.python.keras.callbacks.History at 0x11e24988>

In [0]:
pred = model.predict(x)
print(f"Shape: {pred.shape}")
print(pred[0:10])

Shape: (398, 1)
[[29.78388 ]
 [15.960646]
 [10.26309 ]
 [17.36685 ]
 [19.876402]
 [26.665403]
 [26.811247]
 [25.912039]
 [14.67024 ]
 [30.315916]]


In [0]:
score = np.sqrt(metrics.mean_squared_error(pred,y))
print(f"Final score (RMSE): {score}")

Final score (RMSE): 4.140381098095887


In [0]:
for i in range(10):
    print(f"{i+1}. Car name: {cars[i]}, MPG: {y[i]}, predicted MPG: {pred[i]}")

1. Car name: chevrolet chevelle malibu, MPG: 19.8, predicted MPG: [24.391989]
2. Car name: buick skylark 320, MPG: 26.0, predicted MPG: [29.18781]
3. Car name: plymouth satellite, MPG: 14.0, predicted MPG: [14.881704]
4. Car name: amc rebel sst, MPG: 34.5, predicted MPG: [29.126366]
5. Car name: ford torino, MPG: 24.5, predicted MPG: [25.878511]
6. Car name: ford galaxie 500, MPG: 26.0, predicted MPG: [30.020605]
7. Car name: chevrolet impala, MPG: 12.0, predicted MPG: [11.597242]
8. Car name: plymouth fury iii, MPG: 21.0, predicted MPG: [21.410141]
9. Car name: pontiac catalina, MPG: 26.0, predicted MPG: [28.435133]
10. Car name: amc ambassador dpl, MPG: 14.0, predicted MPG: [14.768454]


In [0]:
import pandas as pd
import io
import requests
import numpy as np
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/iris.csv", 
    na_values=['NA', '?'])

# Convert to numpy - Classification
x = df[['sepal_l', 'sepal_w', 'petal_l', 'petal_w']].values
dummies = pd.get_dummies(df['species']) # Classification
species = dummies.columns
y = dummies.values


# Build neural network
model = Sequential()
model.add(Dense(50, input_dim=x.shape[1], activation='relu')) # Hidden 1
model.add(Dense(25, activation='relu')) # Hidden 2
model.add(Dense(y.shape[1],activation='softmax')) # Output

model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(x,y,verbose=2,epochs=100)

Epoch 1/100
5/5 - 0s - loss: 1.5324
Epoch 2/100
5/5 - 0s - loss: 1.2259
Epoch 3/100
5/5 - 0s - loss: 1.0221
Epoch 4/100
5/5 - 0s - loss: 0.8774
Epoch 5/100
5/5 - 0s - loss: 0.7756
Epoch 6/100
5/5 - 0s - loss: 0.7089
Epoch 7/100
5/5 - 0s - loss: 0.6581
Epoch 8/100
5/5 - 0s - loss: 0.6169
Epoch 9/100
5/5 - 0s - loss: 0.5817
Epoch 10/100
5/5 - 0s - loss: 0.5530
Epoch 11/100
5/5 - 0s - loss: 0.5213
Epoch 12/100
5/5 - 0s - loss: 0.4958
Epoch 13/100
5/5 - 0s - loss: 0.4756
Epoch 14/100
5/5 - 0s - loss: 0.4556
Epoch 15/100
5/5 - 0s - loss: 0.4339
Epoch 16/100
5/5 - 0s - loss: 0.4098
Epoch 17/100
5/5 - 0s - loss: 0.4095
Epoch 18/100
5/5 - 0s - loss: 0.3902
Epoch 19/100
5/5 - 0s - loss: 0.3755
Epoch 20/100
5/5 - 0s - loss: 0.3614
Epoch 21/100
5/5 - 0s - loss: 0.3547
Epoch 22/100
5/5 - 0s - loss: 0.3482
Epoch 23/100
5/5 - 0s - loss: 0.3348
Epoch 24/100
5/5 - 0s - loss: 0.3276
Epoch 25/100
5/5 - 0s - loss: 0.3113
Epoch 26/100
5/5 - 0s - loss: 0.3038
Epoch 27/100
5/5 - 0s - loss: 0.2982
Epoch 28/1

<tensorflow.python.keras.callbacks.History at 0x7fe76396ca58>

In [0]:
pred = model.predict(x)
print(f"Shape: {pred.shape}")
print(pred[0:10])

Shape: (150, 3)
[[9.99815524e-01 1.84525212e-04 1.85941218e-08]
 [9.99294281e-01 7.05586688e-04 1.28294701e-07]
 [9.99627709e-01 3.72200768e-04 7.34074206e-08]
 [9.98987734e-01 1.01204624e-03 2.92811052e-07]
 [9.99840975e-01 1.58977971e-04 1.74473627e-08]
 [9.99765694e-01 2.34254156e-04 1.96927932e-08]
 [9.99562323e-01 4.37569543e-04 1.08409232e-07]
 [9.99643683e-01 3.56295204e-04 4.81689817e-08]
 [9.98640835e-01 1.35851337e-03 5.41718464e-07]
 [9.99323368e-01 6.76511321e-04 1.13288856e-07]]


In [0]:
np.set_printoptions(suppress=True)
print(f"Shape: {pred.shape}")
print(pred[0:10])

Shape: (150, 3)
[[0.9998155  0.00018453 0.00000002]
 [0.9992943  0.00070559 0.00000013]
 [0.9996277  0.0003722  0.00000007]
 [0.99898773 0.00101205 0.00000029]
 [0.999841   0.00015898 0.00000002]
 [0.9997657  0.00023425 0.00000002]
 [0.9995623  0.00043757 0.00000011]
 [0.9996437  0.0003563  0.00000005]
 [0.99864084 0.00135851 0.00000054]
 [0.99932337 0.00067651 0.00000011]]


In [0]:
print(y[0:10])

[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]]


In [0]:
predict_classes = np.argmax(pred,axis=1)
expected_classes = np.argmax(y,axis=1)
print(f"Predictions: {predict_classes}")
print(f"Expected: {expected_classes}")

Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1
 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
Expected: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [0]:
print(species[predict_classes[1:10]])

Index(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa'],
      dtype='object')


In [0]:
from sklearn.metrics import accuracy_score
# Accuracy might be a more easily understood error metric.  It is essentially a test score.  For all of the iris predictions,
# what percent were correct?  The downside is it does not consider how confident the neural network was in each prediction.
correct = accuracy_score(expected_classes,predict_classes)
print(f"Accuracy: {correct}")

Accuracy: 0.98


In [0]:
# ad hoc prediction
sample_flower = np.array( [[5.0,3.0,4.0,2.0]], dtype=float)
pred = model.predict(sample_flower)
print(pred)
pred = np.argmax(pred)
print(f"Predict that {sample_flower} is: {species[pred]}")



[[0.00214385 0.35167828 0.6461779 ]]
Predict that [[5. 3. 4. 2.]] is: Iris-virginica
