In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Reading Data

data1 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/043_200421_Santafe.csv'
data2 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/045_200422_Santafe.csv'
data3 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/046_200423_Santafe.csv'
data4 = '/content/drive/MyDrive/Colab Notebooks/MLmodel_djlee/LEV3/047_200424_Santafe.csv'

dataFrame_raw = pd.concat(map(pd.read_csv, [data1, data2, data3, data4]), ignore_index=True)

In [None]:
output_variable_names = [
    'CAL_CO2Flowrate_gphr'
]

variable_names_to_extract = [ 
    'OBD_CalEngLoad_perc',
    'OBD_EngineSpeed_rpm', 
    'CAL_CO2Flowrate_gphr'
]

dataFrame = dataFrame_raw[[
    'OBD_CalEngLoad_perc',
    'OBD_EngineSpeed_rpm', 
    'CAL_CO2Flowrate_gphr'
]]

In [None]:
# Cleaning data
RPM_min = 100                      # excluding engine-stop
NOx_max = 1649                     # excluding clipped NOx data (since the sensor maxed out at 1650 ppm)

dataFrame = dataFrame.loc[(dataFrame['OBD_EngineSpeed_rpm'] >= RPM_min)] #& (dataFrame['AUX_NOxLNInlet_ppm'] <= NOx_max))]
#dataFrame = dataFrame.drop(['AUX_NOxLNInlet_ppm'], axis=1)
dataFrame = dataFrame.dropna()

train_dataset = dataFrame.sample(frac=0.8,random_state=0)
test_dataset = dataFrame.drop(train_dataset.index)
#print(train_dataset.tail())

In [None]:
# Split train set & test set
train_labels = train_dataset.pop('CAL_CO2Flowrate_gphr')
test_labels = test_dataset.pop('CAL_CO2Flowrate_gphr')

In [None]:
# Check stats
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
train_labels_stats = train_labels.describe()

# Data normalization
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

def norm_label(x):
  return (x - train_labels_stats['mean']) / train_labels_stats['std']
normed_train_labels = norm_label(train_labels)
normed_test_labels = norm_label(test_labels)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.ensemble import BaggingRegressor

model = BaggingRegressor(base_estimator=Lasso())
model.fit(normed_train_data, normed_train_labels)

print(model.score(normed_train_data, normed_train_labels)) 
print(model.score(normed_test_data, normed_test_labels)) 