In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

keras.__version__

Using TensorFlow backend.


'2.2.4-tf'

In [2]:
# Load dataset
dataframe = pd.read_csv("dataset.csv", header=0)
dataset = dataframe.values

X1 = dataset[:,1] # Get the DSCR as RAW value.
Y = dataset[:,0].astype(float) # Get IPC label for training as Float.
X2 = dataset[0:,2:].astype(float) # Get 4 last IPC and 16 counter values as Float.

print("Counters:\n")
print(X2)
print("\nDSCR values:\n")
print(X1)
print("\nIPC labels:\n")
print(Y)

Counters:

[[1.31943900e+00 1.29050400e+00 1.30348400e+00 ... 6.58438130e+07
  1.70366900e+06 2.04187930e+07]
 [1.29835700e+00 1.27056200e+00 1.29895900e+00 ... 6.43080120e+07
  4.33270700e+06 1.93726630e+07]
 [1.28217200e+00 1.29810500e+00 1.28717600e+00 ... 7.15462400e+07
  5.29492800e+06 1.93492280e+07]
 ...
 [1.00086100e+00 1.00314100e+00 1.00086100e+00 ... 4.48407155e+08
  1.98159000e+06 8.73024000e+05]
 [1.00087000e+00 1.00318500e+00 1.00086900e+00 ... 4.48996860e+08
  1.72234800e+06 8.71582000e+05]
 [1.00088000e+00 1.00676100e+00 1.00087800e+00 ... 4.48782413e+08
  1.67138200e+06 8.62587000e+05]]

DSCR values:

[ 0.  0.  0. ... 71. 71. 71.]

IPC labels:

[1.298357 1.282172 1.28414  ... 1.00087  1.00088  1.000897]


In [3]:
# Encode class DSCR as integer values
encoder = LabelEncoder()
encoder.fit(X1)
encoded_X1 = encoder.transform(X1)

# Convert integers to dummy variables (one hot encoded).
dummy_x = np_utils.to_categorical(encoded_X1).astype(float)

# Convert to dataframe to merge columns
dataframe_X1 = pd.DataFrame(dummy_x);
dataframe_X2 = pd.DataFrame(X2);

dataframe_X = pd.DataFrame.merge(dataframe_X1, dataframe_X2,left_index=True, right_index=True) # Merge model inputs

# Converting back to <class 'numpy.ndarray'>
X = dataframe_X.values;

# Labels and inputs, must be the same size
print("Labels: "+str(len(Y))) # Model label training input
print("Inputs: "+str(len(X))) # Model inputs

# Fix random seed for reproducibility
seed = 7
np.random.seed(seed)

# Taining and test split-> 30% test - 70% training
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=seed)

# Train and test data, must be the same size
print("X_Train: "+str(len(X_train)))
print("Y_Train: "+str(len(Y_train)))
print("X_Test: "+str(len(X_test)))
print("Y_Test: "+str(len(Y_test)))

# Shape of the input, number of model features
in_shape = X_train.shape[1]
print("Shape = "+str(in_shape))

Labels: 23040
Inputs: 23040
X_Train: 16128
Y_Train: 16128
X_Test: 6912
Y_Test: 6912
Shape = 24


In [4]:
print("Number of inputs for the model: "+str(in_shape))

# https://keras.io/optimizers/
optimizers = ['sgd','rmsprop','adagrad','adadelta','adam','adamax','nadam'];

# https://keras.io/metrics/
metrics = ['mean_absolute_error','cosine_proximity','sparse_top_k_categorical_accuracy','top_k_categorical_accuracy','sparse_categorical_accuracy','categorical_accuracy','binary_accuracy','accuracy'];

# https://keras.io/losses/
losses = ['mean_squared_error','mean_absolute_error','mean_absolute_percentage_error','mean_squared_logarithmic_error','squared_hinge','hinge','categorical_hinge','logcosh','huber_loss','categorical_crossentropy','sparse_categorical_crossentropy','binary_crossentropy','kullback_leibler_divergence','poisson','cosine_proximity','is_categorical_crossentropy'];

# https://keras.io/activations/
activations = ['relu','softmax','tanh','elu','selu','softplus','softsign','sigmoid','hard_sigmoid','exponential','linear'];

# Model definition as function
def PC_IBM_model():
	# Create model
	model = Sequential()
	
	# Deepnet layers definition
	model.add(Dense(10, input_shape=(in_shape,), activation='sigmoid'))
	model.add(Dense(32, activation='sigmoid'))
	model.add(Dense(1))
	
	# Compile model
	model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mean_absolute_error'])

	# Show model definition
	model.summary();
	
	return model

Number of inputs for the model: 24


In [5]:
# batch_size -> Divide dataset in batches of 100.
# epochs -> Number of times that we are applying the process to each 100 examples of data.
num_epochs = 1200;
num_batch = 2000;
model = PC_IBM_model();

#estimator = KerasClassifier(build=PC_IBM_model, epochs=5, batch_size=100)
model.fit(X_train, Y_train, epochs=num_epochs, batch_size=num_batch);

an_absolute_error: 0.2441
Epoch 1037/1200
Epoch 1038/1200
Epoch 1039/1200
Epoch 1040/1200
Epoch 1041/1200
Epoch 1042/1200
Epoch 1043/1200
Epoch 1044/1200
Epoch 1045/1200
Epoch 1046/1200
Epoch 1047/1200
Epoch 1048/1200
Epoch 1049/1200
Epoch 1050/1200
Epoch 1051/1200
Epoch 1052/1200
Epoch 1053/1200
Epoch 1054/1200
Epoch 1055/1200
Epoch 1056/1200
Epoch 1057/1200
Epoch 1058/1200
Epoch 1059/1200
Epoch 1060/1200
Epoch 1061/1200
Epoch 1062/1200
Epoch 1063/1200
Epoch 1064/1200
Epoch 1065/1200
Epoch 1066/1200
Epoch 1067/1200
Epoch 1068/1200
Epoch 1069/1200
Epoch 1070/1200
Epoch 1071/1200
Epoch 1072/1200
Epoch 1073/1200
Epoch 1074/1200
Epoch 1075/1200
Epoch 1076/1200
Epoch 1077/1200
Epoch 1078/1200
Epoch 1079/1200
Epoch 1080/1200
Epoch 1081/1200
Epoch 1082/1200
Epoch 1083/1200
Epoch 1084/1200
Epoch 1085/1200
Epoch 1086/1200
Epoch 1087/1200
Epoch 1088/1200
Epoch 1089/1200
Epoch 1090/1200
Epoch 1091/1200
Epoch 1092/1200
Epoch 1093/1200
Epoch 1094/1200
Epoch 1095/1200
Epoch 1096/1200
Epoch 1097/120

In [6]:
test_loss, test_metric = model.evaluate(X_test, Y_test)
print('Test metric: %.2f%%' % (test_metric*100))
print('Test loss: %.2f%%' % (test_loss*100))

Test metric: 24.79%
Test loss: 11.39%


In [7]:
# Save model and architecture to single file
model.save("PC_IBM_model.h5")
print("Saved model to disk")

Saved model to disk


In [8]:
from keras.models import load_model
# load model
model = load_model('PC_IBM_model.h5')