In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
# Load dataset
dataframe = pd.read_csv("dataset.csv", header=0)
dataset = dataframe.values

X1 = dataset[:,1] # Get the DSCR as RAW value.
Y = dataset[:,0].astype(float) # Get IPC label for training as Float.
X2 = dataset[0:,2:].astype(float) # Get 4 last IPC and 16 counter values as Float.

print("Counters:\n")
print(X2)
print("\nDSCR values:\n")
print(X1)
print("\nIPC label:\n")
print(Y)

Counters:

[[1.31943900e+00 1.29050400e+00 1.30348400e+00 ... 6.58438130e+07
  1.70366900e+06 2.04187930e+07]
 [1.29835700e+00 1.27056200e+00 1.29895900e+00 ... 6.43080120e+07
  4.33270700e+06 1.93726630e+07]
 [1.28217200e+00 1.29810500e+00 1.28717600e+00 ... 7.15462400e+07
  5.29492800e+06 1.93492280e+07]
 ...
 [1.00086100e+00 1.00314100e+00 1.00086100e+00 ... 4.48407155e+08
  1.98159000e+06 8.73024000e+05]
 [1.00087000e+00 1.00318500e+00 1.00086900e+00 ... 4.48996860e+08
  1.72234800e+06 8.71582000e+05]
 [1.00088000e+00 1.00676100e+00 1.00087800e+00 ... 4.48782413e+08
  1.67138200e+06 8.62587000e+05]]

DSCR values:

[ 0.  0.  0. ... 71. 71. 71.]

IPC label:

[1.298357 1.282172 1.28414  ... 1.00087  1.00088  1.000897]


In [3]:
# Encode class DSCR as integer values
encoder = LabelEncoder()
encoder.fit(X1)
encoded_X1 = encoder.transform(X1)

# Convert integers to dummy variables (one hot encoded).
dummy_x = np_utils.to_categorical(encoded_X1)

# Convert to dataframe to merge columns
dataframe_X1 = pd.DataFrame(dummy_x);
dataframe_X2 = pd.DataFrame(X2);

print("DataFrame X1:")
print(dataframe_X1)
print("DataFrame X2:")
print(dataframe_X2)

dataframe_X = pd.DataFrame.merge(dataframe_X1, dataframe_X2,how='left') # Merge model inputs

print("DataFrame X:")
print(dataframe_X)

# Converting back to <class 'numpy.ndarray'>
X = dataframe_X.values;
print("X:")
print(X)

# Labels and inputs must be the same size
print("Labels: "+str(len(Y))) # Model label training input
print("Inputs: "+str(len(X))) # Model inputs

# Fix random seed for reproducibility
seed = 7
np.random.seed(seed)

# Taining and test split-> 30% test - 70% training
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=seed)

# Length of train and test data
print("X_Train: "+str(len(X_train)))
print("X_Test: "+str(len(X_test)))
print("X_Train: "+str(len(Y_train)))
print("X_Test: "+str(len(Y_test)))

DataFrame X1:
         0    1    2    3
0      1.0  0.0  0.0  0.0
1      1.0  0.0  0.0  0.0
2      1.0  0.0  0.0  0.0
3      1.0  0.0  0.0  0.0
4      1.0  0.0  0.0  0.0
...    ...  ...  ...  ...
23035  0.0  0.0  1.0  0.0
23036  0.0  0.0  1.0  0.0
23037  0.0  0.0  1.0  0.0
23038  0.0  0.0  1.0  0.0
23039  0.0  0.0  1.0  0.0

[23040 rows x 4 columns]
DataFrame X2:
              0         1         2         3          4          5  \
0      1.319439  1.290504  1.303484  1.313838  5126536.0  2800303.0   
1      1.298357  1.270562  1.298959  1.277435  4389713.0  3869025.0   
2      1.282172  1.298105  1.287176  1.285875  4225052.0  3701957.0   
3      1.284140  1.249545  1.284187  1.269528  5294369.0  2927305.0   
4      1.276229  1.254733  1.278763  1.271525  3748437.0  4127689.0   
...         ...       ...       ...       ...        ...        ...   
23035  1.000843  1.003143  1.000842  1.003319    56283.0   105503.0   
23036  1.000853  1.003033  1.000851  1.003258    56111.0   104254.

In [None]:
# Model definition as function
def PC_IBM_model():
	# Create model
	model = Sequential()
	# With the actual encoding input_dim must be more than 20 because there are 4 inputs for dscr (one hot)
	model.add(Dense(8, input_dim=20, activation='relu'))
	model.add(Dense(2, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [None]:
estimator = KerasClassifier(build_fn=PC_IBM_model, epochs=200, batch_size=5, verbose=0)
estimator.fit(X_train, Y_train);

In [None]:
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)

In [None]:
results = cross_val_score(estimator, X_train, Y_train, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

In [None]:
predictions = estimator.predict(X_test)
print(predictions)
print(encoder.inverse_transform(predictions))