In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras 
import os
import time
from sklearn.metrics import accuracy_score

In [13]:
class ReboilerBinaryClassifier:
    FEATURES_NUM = 8
    OUTPUT_NUM = 2

    def __init__(self, filepath):
        self.filepath = filepath

    def load_data(self):
        data = pd.read_csv(self.filepath)
        return self.process(data)

    def process(self, pandas_df):
        for col in pandas_df:
            if pandas_df[col].isnull().any(): # if column has at least one NaN value
                pandas_df[col].fillna(value=pandas_df[col].mean(), inplace=True) # replace NaN with average

        return pandas_df         
                
    def split_data(self, data, val_size=0.2, test_size=0.2):
        X = data.iloc[:, 1:-1] # all rows, all columns except first and last
        y = data.iloc[:, -1] # all rows, last column (label)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=val_size, random_state=42)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=test_size, random_state=42)
        return X_train, y_train, X_val, y_val, X_test, y_test

    def create_model(self, print_summary=True):    

        model = keras.models.Sequential([
            keras.layers.Dense(32, input_shape=(ReboilerBinaryClassifier.FEATURES_NUM,), activation='sigmoid'),
            # keras.layers.Dense(ReboilerBinaryClassifier.OUTPUT_NUM-1, activation="sigmoid")
            keras.layers.Dense(ReboilerBinaryClassifier.OUTPUT_NUM, activation="softmax")
        ])

        model.compile(
            loss='sparse_categorical_crossentropy',
            # loss="binary_crossentropy",
            optimizer="adam",
            metrics=["accuracy"]
        )
        if print_summary:
            model.summary()

        return model
 
    def train_model(self, model, X_train, y_train, X_val, y_val, X_test, y_test, epochs):
        run_logdir = self.get_run_logdir()
        tensorboard_cb = keras.callbacks.TensorBoard(run_logdir) # create event files within run_logdir
        history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_val, y_val), callbacks=[tensorboard_cb])
        y_prob = model.predict(X_test)
        y_pred = y_prob.argmax(axis=-1)
        accuracy = accuracy_score(y_test, y_pred)
        print('accuracy = {}'.format(accuracy)) 

    @staticmethod
    def get_run_logdir():    
        root_logdir = os.path.join(os.curdir, "logs")
        run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
        return os.path.join(root_logdir, run_id)


In [14]:
r = ReboilerBinaryClassifier('Reboiler_Raw Data_With Timestamps.csv')

In [15]:
r_pandas = r.load_data()

In [16]:
r_pandas.head()

Unnamed: 0,Fecha,VAL356CI8017-Conductividad,VAL356M003-Carga Motor,VAL356M014-Carga Motor,VAL356M015-Carga Motor,VAL356PI8026-Ind.Presión,VAL356PIC8025-Ind.Presión,VAL356TI8015-Ind.Temperatura,VAL356TIC8014-Ind.Temperatura,Estado
0,2006-10-24 23:38:00,12.3,68.026459,45.670056,30.343173,21.492553,20.9,71.883617,103.414846,0
1,2006-10-25 01:38:00,18.9,68.827098,45.924569,30.126654,20.954956,20.6,72.301453,103.487449,0
2,2006-10-25 03:38:00,25.9,68.871664,49.477196,29.701442,21.193045,20.3,71.775536,103.582936,0
3,2006-10-25 05:38:00,27.2,65.658285,51.649763,28.820497,21.021897,20.6,70.589861,103.6978,0
4,2006-10-25 07:38:00,22.0,64.166553,46.851182,27.760388,20.97588,20.4,71.762377,103.680362,0


In [20]:
X_train, y_train, X_val, y_val, X_test, y_test = r.split_data(r_pandas)

In [21]:
print('X_train', X_train.shape)
print('y_train', y_train.shape)
print('X_val', X_val.shape)
print('y_val', y_val.shape)
print('X_test', X_test.shape)
print('y_test', y_test.shape)

X_train (27011, 8)
y_train (27011,)
X_val (6753, 8)
y_val (6753,)
X_test (8442, 8)
y_test (8442,)


In [22]:
model = r.create_model()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 32)                288       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 66        
Total params: 354
Trainable params: 354
Non-trainable params: 0
_________________________________________________________________


In [23]:
r.train_model(model, X_train, y_train, X_val, y_val, X_test, y_test, epochs=30)

Train on 27011 samples, validate on 6753 samples
Epoch 1/30

2023-01-10 22:15:14.219401: I tensorflow/core/profiler/lib/profiler_session.cc:184] Profiler session started.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
accuracy = 0.9898128405591092


In [12]:
%reload_ext tensorboard
# %load_ext tensorboard
%tensorboard --logdir=./logs --port=6006