In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

## https://www.tensorflow.org/tutorials/keras/regression?hl=ko

###### Preprocessing Data
# Reading Data

data1 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/043_200421_Santafe.csv'
data2 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/045_200422_Santafe.csv'
data3 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/046_200423_Santafe.csv'
data4 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/047_200424_Santafe.csv'

dataFrame_raw = pd.concat(map(pd.read_csv, [data1, data2, data3, data4]), ignore_index=True)

output_variable_names = [
    'CAL_CO2Flowrate_gphr'
    #'CAL_NOxFlowrateLNTOutletValid_gps'
]

variable_names_to_extract = [ 
#   'AUX_NOxLNInlet_ppm', 
#   'AUX_NOxLNTOutlet_ppm',
#   'CAL_Time_sec', 
#   'AUX_RTCDate_YYYYMMDD', 
    'OBD_CalEngLoad_perc',
#   'OBD_EngCoolantTemp_degC', 
    'OBD_EngineSpeed_rpm', 
#   'OBD_VehicleSpeed_kmph', 
#   'OBD_ActEngPerTorque_perc',
#   'OBD_EngRefTorque_Nm', 
#   'OBD_MAFSensor_gps', 
#   'OBD_EGT11_degC', 
#   'OBD_EGT12_degC',
#   'OBD_DPFDiffPress_kPa', 
#   'AUX_NOxSCROutlet_ppm', 
#   'AUX_lambdaLNTInlet_none', 
#   'AUX_lambdaLNTOutlet_none',
#   'AUX_lambdaSCROutlet_none', 
#   'AUX_O2FracLNTInlet_volPerc', 
#   'AUX_O2FracLNTOutlet_volPerc',
#   'AUX_O2FracSCROutlet_volPerc',
#   'AUX_SensAmbTemp_degC',
#   'AUX_SensAmbRH_perc',
#   'AUX_SensBaroPress_kPa',
#   'AUX_SensTempTurbinOutlet_degC',
#   'AUX_SensTempInManiInlet_degC',
#   'AUX_SensTempTurbinInlet_degC',
#   'AUX_SensTempLNTOutlet_degC',
#   'AUX_SensTempSCROutlet_degC',
#   'AUX_SensTempCompressorOutlet_degC',
#   'AUX_SensPressEGRCoolerInlet_absBar',
#   'AUX_SensDiffPressDPF_kPa',
#   'AUX_SensDiffPressSCR_kPa',
#   'CAL_ExhFlowrate_gps',
    'CAL_CO2Flowrate_gphr',
#   'CAL_NOxFlowrateLNTInletValid_gps',
#   'CAL_NOxFlowrateLNTOutletValid_gps',
#   'CAL_NOxFlowrateSCROutletValid_gps'
]

dataFrame = dataFrame_raw[[
    'OBD_CalEngLoad_perc',
    #'OBD_EngCoolantTemp_degC', 
    'OBD_EngineSpeed_rpm', 
    'CAL_CO2Flowrate_gphr'
]]

#print(dataFrame.head())

###### Cleaning data
RPM_min = 100                      # excluding engine-stop
NOx_max = 1649                     # excluding clipped NOx data (since the sensor maxed out at 1650 ppm)

dataFrame = dataFrame.loc[(dataFrame['OBD_EngineSpeed_rpm'] >= RPM_min)] #& (dataFrame['AUX_NOxLNInlet_ppm'] <= NOx_max))]
#dataFrame = dataFrame.drop(['AUX_NOxLNInlet_ppm'], axis=1)
dataFrame = dataFrame.dropna() 

train_dataset = dataFrame.sample(frac=0.8,random_state=0)
test_dataset = dataFrame.drop(train_dataset.index)
#print(train_dataset.tail())


###### Split train set & test set
train_labels = train_dataset.pop('CAL_CO2Flowrate_gphr')
test_labels = test_dataset.pop('CAL_CO2Flowrate_gphr')
#print(train_labels.head())

## Check stats
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
#print(train_stats.describe())

train_labels_stats = train_labels.describe()
#train_labels_stats = train_labels.transpose()
#print(train_labels_stats.describe())
#print(train_labels_stats['mean'])

###### Data normalization
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

def norm_label(x):
  return (x - train_labels_stats['mean']) / train_labels_stats['std']
normed_train_labels = norm_label(train_labels)
normed_test_labels = norm_label(test_labels)

###### Data Visualization
normed_train_data_np = normed_train_data.to_numpy()
normed_train_labels_np = normed_train_labels.to_numpy()
#print(normed_train_data_np.shape)
#print(normed_train_labels_np.shape)
plt.subplot(1, 2, 1) 
plt.scatter(normed_train_data_np[:,0], normed_train_labels_np)
plt.xlabel('Eng_load')
plt.ylabel('CO2')
plt.subplot(1, 2, 2) 
plt.scatter(normed_train_data_np[:,1], normed_train_labels_np)
plt.xlabel('Eng_speed')
plt.ylabel('CO2')
plt.show()


### 3D plot
n = 19258
xmin, xmax, ymin, ymax, zmin, zmax = -5, 5, -5, 10, -10, 20
cmin, cmax = 0, 2

xs = normed_train_data_np[:,0]
ys = normed_train_data_np[:,1]
zs = normed_train_labels_np
color = np.array([(cmax - cmin) * np.random.random_sample() + cmin for i in range(n)])

fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs, ys, zs, c=color, marker='o', s=15, cmap='Greens')

plt.show()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
## Creating Model
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

model = build_model()
print(model.summary())

# 에포크가 끝날 때마다 점(.)을 출력해 훈련 진행 과정을 표시합니다
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

EPOCHS = 1000

# patience 매개변수는 성능 향상을 체크할 에포크 횟수입니다
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(normed_train_data, normed_train_labels, epochs=EPOCHS,
                    validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot()])

loss, mae, mse = model.evaluate(normed_test_data, normed_test_labels, verbose=2)

print("\n 테스트 세트의 평균 절대 오차: {:5.2f} CO2Flowrate_gphr".format(mae))