In [None]:
import numpy as np
import pandas as pd
import time
import tensorflow
from tensorflow.keras import Sequential
from tensorflow.keras.backend import set_floatx
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from tensorflow.keras.layers import InputLayer, BatchNormalization, LSTM, Dropout, Dense
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler
from keras import optimizers
import matplotlib.pyplot as plt


_dtype = 'float32'
class_num = 5
arg_num = 7

In [None]:
df = pd.read_csv('../input/commercial-vehicles-sensor-data-set/Terra-D2-multi-labeled-interpolated.csv')

In [None]:
df.head(-10)

In [None]:
def preprocess(df: pd.DataFrame):
    copy = df.copy()

    #Cleaning up meaningless data
    copy = copy[np.all((copy.label.apply(float.is_integer), copy.label != 0), axis=0)]

    #Clustering by classes
    del copy['time']
    grouped = copy.groupby('label')

    #Clustering by input/output
    x = []
    y = []
    print('_number of class instances:')
    for label, data in grouped:
        data = data.drop(columns=['label'])
        print(data.shape, label)
        x.append(data.to_numpy(copy=True, dtype=_dtype))
        y_exp = [0 for i in range(class_num)]
        y_exp[int(label)-1] = 1
        y.append([y_exp] * data.shape[0])  
    x = np.concatenate(x)
    y = np.asarray(y)
    y = np.concatenate(y).astype(_dtype)

    #Clustering by intervals
    x = np.reshape(x, (-1, 7))

    #Clustering by train/validate
    test_fraction = 0.2
    p = np.random.permutation(x.shape[0])
    split_idx = int(x.shape[0] * (1 - test_fraction))

    x_train = x[p[:split_idx]]
    x_valid = x[p[split_idx:]]

    y_train = y[p[:split_idx]]
    y_valid = y[p[split_idx:]]
    return x_train, x_valid, y_train, y_valid

In [None]:
x_train

In [None]:
x_train, x_valid, y_train, y_valid =  preprocess(df.sample(1497743))

In [None]:
def create_model():
    model = Sequential([
        Dense(arg_num, input_shape=(arg_num,)),
        BatchNormalization(),
        Dense(256, activation='relu'),
        Dropout(.2),
        BatchNormalization(),
        Dense(64, activation='relu'),
        Dropout(.2),
        Dense(class_num, activation='softmax')
    ])

    return model

In [None]:
model = create_model()

In [None]:

model.summary()

In [None]:
opt = optimizers.SGD(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['categorical_accuracy'])

history = model.fit(x_train, y_train, epochs=10, validation_data=(x_valid, y_valid), batch_size=10)

In [None]:
plt.plot(history.history['categorical_accuracy'], label='acc')
plt.plot(history.history['val_categorical_accuracy'], label='val_acc')
plt.ylim((0, 1))
plt.legend()

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.ylim((0, 1))
plt.legend()