# Commercial Vehicles Sensor Classification

## Import Necessary Libraries

In [16]:
## Import necessary libraries
import pandas as pd
import numpy as np
import random 
import pickle
from urllib.parse import quote
from sklearn.preprocessing import MinMaxScaler

## Import necessary API
import sys
sys.path.append('../../../../')
from api.v2.util.data_load import data_load
from api.v2.model.Transformer import Transformer_Encoder

## Import libraries for the model
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import trange
from sklearn.metrics import f1_score, classification_report

## Set path for saving model training results 
import os
os.makedirs('./result', exist_ok=True)

## Set Cuda for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Set random seed
def set_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

# Set seed
seed_val = 77
set_seed(seed_val)

cuda


## Base Parameter Setting
* Set parameters based on the information identified during EDA (Exploratory Data Analysis).

In [17]:
# Set Commercial Vehicles Sensor Data Parameter

# Set MachBase Neo URL address
URL = 'http://127.0.0.1:5654'
# Set Tag Table Name
table = 'commercial_vehicles'
# Select Tag Name -> Can Check Tag Names Using command 'show_column(URL, table)'
# Set Austria Tag Name 
tags = ['gFx', 'gFy', 'gFz', 'label', 'speed', 'wx', 'wy', 'wz']
# Wrap each item in the list with single quotes and separate with commas
tags_ = ",".join(f"'{tag}'" for tag in tags)
# Set Tag Name
name = quote(tags_, safe=":/")
# Set resample Option -> D(day), H(hour), T(minute), S(second)
resample_freq = None
# Set Start time
start_time = '2025-01-01 00:00:00'
# Set End time 
end_time = '2025-01-01 04:44:00'
# Set TimeFormat - > 'default' or quote('2006-01-02 15:04:05.000000')(Divided down to the nanosecond)
timeformat = quote('2006-01-02 15:04:05.000000')

## Commercial Vehicles Sensor Data Load

In [3]:
# Commercial Vehicles Sensor Data Load
df = data_load(URL, table, name, start_time, end_time, timeformat, resample_freq)

# Move the 'label' column to the last position
df = df.reindex(columns=[col for col in df.columns if col != 'label'] + ['label'])

# Convert the 'label' column to integer type
df['label'] = df['label'].astype(int)

# Remove labels 0 and 5, as they have no meaning as labels
df = df[(df['label'] != 0) & (df['label'] != 5)]

# Adjust the label values to start from 0
df['label'] = df['label'] - 1

In [4]:
df

NAME,gFx,gFy,gFz,speed,wx,wy,wz,label
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-01-01 00:00:00.000000,-0.272700,0.044200,0.938200,0.000000,-0.0053,-0.0077,-0.0331,0
2025-01-01 00:00:00.009999,-0.272700,0.044200,0.938200,0.000000,-0.0053,-0.0077,-0.0331,0
2025-01-01 00:00:00.020000,-0.289500,0.046200,0.993400,0.000000,-0.0010,0.0070,-0.0312,0
2025-01-01 00:00:00.029999,-0.289500,0.046200,0.993400,0.000000,-0.0010,0.0070,-0.0312,0
2025-01-01 00:00:00.040000,-0.289500,0.046200,0.993400,0.000000,-0.0010,0.0070,-0.0312,0
...,...,...,...,...,...,...,...,...
2025-01-01 04:43:19.792028,-0.236300,0.020500,0.950400,3.246584,0.0223,0.0100,0.0157,1
2025-01-01 04:43:19.802028,-0.236300,0.020500,0.950400,3.246584,0.0223,0.0100,0.0157,1
2025-01-01 04:43:19.812028,-0.241854,-0.017826,0.966228,3.246584,0.0144,0.0216,0.0157,1
2025-01-01 04:43:19.822029,-0.227540,0.000489,0.910911,3.246584,0.0144,0.0216,0.0157,1


In [5]:
# Split the data into train, test sets
train = df[df.index.hour < 4]
test = df[df.index.hour >= 4]

## Data Preprocessing
* 1 Min-Max Scaling

In [6]:
# Set up Scalers
scaler = MinMaxScaler()

# Apply Scalers
train_ = scaler.fit_transform(train.iloc[:,:-1].values)
test_ = scaler.transform(test.iloc[:,:-1].values)

# Set Each DataFrames
train_scaled = pd.DataFrame(train_, columns=train.columns[:-1])
train_scaled['label'] = train['label'].values

test_scaled = pd.DataFrame(test_, columns=test.columns[:-1])
test_scaled['label'] = test['label'].values

# Save Scaler
with open('./result/trans_scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

## Dataset & Loader Setup

In [7]:
class Vehicles_Dataset(Dataset):

    def __init__(self, df):
        self.freq_data = df.iloc[:,:-1]
        self.label = df.iloc[:,-1:].values

    def __len__(self):
        return len(self.freq_data)

    def __getitem__(self, index):

        input_time_data = self.freq_data.iloc[index,:]
        input_time_data = torch.Tensor(input_time_data).expand(1, input_time_data.shape[0])
        label = self.label[index]

        return input_time_data, label

In [8]:
# Set up datasets  
train_ = Vehicles_Dataset(train_scaled)
test_ = Vehicles_Dataset(test_scaled)

# Set up data loaders
train_dataloader = DataLoader(train_, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_, batch_size=64, shuffle=False)

In [9]:
# Verify DataLoader application and check the shape of the input data
print(list(train_dataloader)[0][0].shape)

torch.Size([64, 1, 7])


## Model Configuration
* Using Transformer model.

In [18]:
# Hyperparameter settings
input_dim = 7         # Number of features in the time series
model_dim = 64        # Dimension of the model
num_heads = 2         # Number of attention heads
num_layers = 3        # Number of Transformer layers
output_dim = 6        # Dimension of the predicted labels
dropout = 0.1         # Dropout rate

# Model configuration
model = Transformer_Encoder(input_dim, model_dim, num_heads, num_layers, output_dim, dropout).to(device)

# Loss function and optimizer configuration
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

print(model)

Transformer_Encoder(
  (input_embedding): Linear(in_features=7, out_features=64, bias=True)
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
    )
    (linear1): Linear(in_features=64, out_features=512, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=512, out_features=64, bias=True)
    (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=512

## Model Training

* Save the model with the Best F1 Score based on the train data during training.

In [12]:
# Initialize training loss
train_loss = []
# Initialize training accuracy
train_acc = []
# Initialize total step
total_step = len(train_dataloader)
# Set number of epochs
epoch_in = trange(20, desc='training')
# Initialize best F1 Score value
best_f1= 0

# Start model training
for epoch in epoch_in:

    model.train()
    running_loss = 0.0
    correct = 0
    total=0
    preds_ = []
    targets_ = []
    
    # Initialize loss
    train_loss = []
    
    for batch_idx, (data, target) in enumerate(train_dataloader):
        
        data = data.to(device).float()
        target = target.to(device).long().squeeze()
 
        optimizer.zero_grad()
        
        # Input to the model
        outputs = model(data)
        outputs = outputs.squeeze()
    
        # Calculate loss
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        # Set label predictions 
        _,pred = torch.max(outputs, dim=1)
        target_ = target.view_as(pred)
        correct += torch.sum(pred==target).item()
        total += target.size(0)
        
        preds_.append(pred)
        targets_.append(target_)
            
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss)}, train acc: {(100 * correct / total):.4f}')

    # Combine predictions and labels collected from all batches
    preds_ = torch.cat(preds_).detach().cpu().numpy()
    targets_ = torch.cat(targets_).detach().cpu().numpy()
    
    f1score = f1_score(targets_, preds_,  average='macro')
    if best_f1 < f1score:
        best_f1 = f1score
        # Save the best model 
        with open("./result/Commercial_Vehicles_Sensor_Full.txt", "a") as text_file:
            print('epoch=====',epoch, file=text_file)
            print(classification_report(targets_, preds_, digits=4), file=text_file)
        print('model save')
        torch.save(model, f'./result/Commercial_Vehicles_Sensor_Full.pt') 
    epoch_in.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_f1}")

training:   0%|          | 0/20 [00:00<?, ?it/s]

  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)



train loss: 0.4242396907582764, train acc: 83.9889


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


model save

train loss: 0.3439836549059704, train acc: 87.0349
model save

train loss: 0.3206093940851281, train acc: 87.9573
model save

train loss: 0.3035449644920581, train acc: 88.6187
model save

train loss: 0.28597104062252504, train acc: 89.3761
model save

train loss: 0.27451117475890346, train acc: 89.8258
model save

train loss: 0.2636892217366246, train acc: 90.2828
model save

train loss: 0.2555057024740027, train acc: 90.6163
model save

train loss: 0.24909691200559783, train acc: 90.8777
model save

train loss: 0.24171262188817935, train acc: 91.1744
model save

train loss: 0.23805520613573375, train acc: 91.3430
model save

train loss: 0.23428755970865556, train acc: 91.5018
model save

train loss: 0.23041820415659878, train acc: 91.6270
model save

train loss: 0.22614723819252996, train acc: 91.8035
model save

train loss: 0.224583660335315, train acc: 91.8746
model save

train loss: 0.22180835534133383, train acc: 91.9784
model save

train loss: 0.21966864783682505, tr

## Model Testing

In [13]:
# Load the best model
model_ = torch.load(f'./result/Commercial_Vehicles_Sensor_Full.pt')

In [14]:
# Model testing
preds_test = []
target_test = []
with torch.no_grad():
    model_.eval()
    for batch_idx, (data, target) in enumerate(test_dataloader):
        data = data.to(device).float()
        target = target.to(device).long().squeeze()
        
        outputs_t = model_(data)
        outputs_t = outputs_t.squeeze()
        
        _,pred_t = torch.max(outputs_t, dim=1)
        targets_t = target.view_as(pred_t).to(device)

        preds_test.append(pred_t)
        target_test.append(targets_t)
        
    # Combine predictions and labels collected from all batches
    preds_test = torch.cat(preds_test).detach().cpu().numpy()
    target_test = torch.cat(target_test).detach().cpu().numpy()

## Model Performance Evaluation

In [15]:
print(classification_report(target_test, preds_test))

              precision    recall  f1-score   support

           0       0.30      0.31      0.30     25611
           1       0.87      0.95      0.91    127886
           2       0.75      0.58      0.65     85496
           3       0.40      0.53      0.46     20991

    accuracy                           0.73    259984
   macro avg       0.58      0.59      0.58    259984
weighted avg       0.73      0.73      0.73    259984

