# Driving Behavior Classification

## Import Necessary Libraries

In [1]:

## Import necessary libraries
import pandas as pd
import numpy as np
import random 
from urllib.parse import quote
from datetime import timedelta, datetime

## Import necessary API
import sys
sys.path.append('../../../../')
from api.v2.util.data_load import data_load
from api.v2.Preprocessing.Make_Time_Feature import TimeFeatureGenerator
from api.v2.util.set_minmax import set_minmax_value
from api.v2.Preprocessing.MinMaxScaler import MinMaxScaler
from api.v2.model.TCN import TCN

## Import libraries for the model
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from tqdm.notebook import trange
from sklearn.metrics import f1_score, classification_report

## Set path for saving model training results 
import os
os.makedirs('./result', exist_ok=True)

## Set Cuda for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Set random seed
def set_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

# Set seed
seed_val = 77
set_seed(seed_val)

cuda


## Base Parameter Setting
* Set parameters based on the information identified during EDA (Exploratory Data Analysis).

In [2]:
# Set Driving Behavior Data Base Parameter

# Set MachBase Neo URL address
URL = 'http://127.0.0.1:5654'
# Set Tag Table Name
table = 'driving_behavior'
# Select Tag Name -> Can Check Tag Names Using command 'show_column(URL, table)'
tags = ['AccX', 'AccY', 'AccZ', 'Class', 'GyroX', 'GyroY', 'GyroZ']
# Wrap each item in the list with single quotes and separate with commas
tags_ = ",".join(f"'{tag}'" for tag in tags)
# Set Tag Name
name = quote(tags_, safe=":/")
# Set resample Option -> D(day), H(hour), T(minute), S(second)
resample_freq = None
# Set Start time
start_time = '2025-07-18 00:00:00'
# Set End time 
end_time = '2025-07-18 01:52:07'
# Set TimeFormat - > 'default' or quote('2006-01-02 15:04:05.000000')(Divided down to the nanosecond)
timeformat = 'default'

## Model Configuration
* Using Temporal Convolutional Network(TCN) model.

In [3]:
# Model configuration parameters
input_channels = 16
out_channels = 3
hidden_channels = [32, 64] 
kernel_size = 1
dropout = 0.05

# Learning rate
learning_rate = 0.001

# Initialize the model, loss, and optimizer
model = TCN(num_inputs=input_channels, num_channels=hidden_channels,out_channel=out_channels, kernel_size=kernel_size, dropout=dropout).to(device)

# Configure loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Check the model architecture
print(model)

TCN(
  (network): Sequential(
    (0): TemporalBlock(
      (conv1): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
      (relu1): ReLU()
      (dropout1): Dropout(p=0.05, inplace=False)
      (conv2): Conv1d(32, 32, kernel_size=(1,), stride=(1,))
      (relu2): ReLU()
      (dropout2): Dropout(p=0.05, inplace=False)
      (net): Sequential(
        (0): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
        (1): ReLU()
        (2): Dropout(p=0.05, inplace=False)
        (3): Conv1d(32, 32, kernel_size=(1,), stride=(1,))
        (4): ReLU()
        (5): Dropout(p=0.05, inplace=False)
      )
      (downsample): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
      (relu): ReLU()
    )
    (1): TemporalBlock(
      (conv1): Conv1d(32, 64, kernel_size=(1,), stride=(1,), dilation=(2,))
      (relu1): ReLU()
      (dropout1): Dropout(p=0.05, inplace=False)
      (conv2): Conv1d(64, 64, kernel_size=(1,), stride=(1,), dilation=(2,))
      (relu2): ReLU()
      (dropout2): Dropout(p=0.05, inplace

## Dataset & Loader Setup

In [4]:
class Driving_Dataset(Dataset):
    def __init__(self, data, target_column, seq_length=10):
        self.data = data
        self.target_column = target_column
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length + 1

    def __getitem__(self, idx):
        # Input data: Values of multiple variables over seq_length - 1
        x = self.data[idx:idx + self.seq_length].drop(columns=[self.target_column]).values.reshape(-1, self.seq_length)  # Excluding target column
        # Target data: Target variable value on the seq_length 
        y = self.data[self.target_column].iloc[idx + self.seq_length - 1]
        return x, y

## Model Training

In [5]:
# Model training function
def train(epochs, start_time_train, end_time_train, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size):
    
    # Initialize training loss
    train_loss = []
    # Initialize training accuracy
    train_acc = []
    
    # Initialize best F1 Score value
    best_f1= 0
    
    # Start model training
    for epoch in epochs:
        
        # Set the model to training mode
        model.train()
        correct = 0
        total=0
        preds_ = []
        targets_ = []
        
        # Initialize loss and total step
        running_loss = 0.0
        total_step = 0
        
        # Set initial Time
        args = {unit: Fetch_size}
        start_time = start_time_train
        end_time = str(datetime.strptime(start_time_train, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
        end_time_train_ = str(datetime.strptime(end_time_train, "%Y-%m-%d %H:%M:%S") + timedelta(seconds=1))
        
        # Set flag
        flag = False

        while end_time < end_time_train_:
            
            # Load batch data
            data = data_load(URL, table, name, start_time, end_time, timeformat, resample_freq)

            # Rename the 'Class' column to 'label'
            data.rename(columns={'Class': 'label'}, inplace=True)

            # Move the 'label' column to the last position
            data = data.reindex(columns=[col for col in data.columns if col != 'label'] + ['label'])

            # Convert the 'label' column to integer type
            data['label'] = data['label'].astype(int)

            # Apply MinMaxscaler
            data_scaled = scaler.fit_transform(data.iloc[:,:-1].values, Min.drop(columns=[0]).values, Max.drop(columns=[0]).values)
            
            # Set up the DataFrame
            data_ = pd.DataFrame(data_scaled, index=data.index)
            data_['label'] = data['label'].values
            
            # Set TimeFeatureGenerator
            feature_generator = TimeFeatureGenerator()  
            
            # Make Time Featrue
            time_feature = feature_generator.generate_features(data.index)
            
            # concat origin dataset
            data_ = pd.concat([data_, time_feature], axis=1)
            
            # Drop NaN values
            data = data_.dropna()

            # Set up dataset & Loader
            train_ = Driving_Dataset(data, target_column='label', seq_length=2)

            train_dataloader = DataLoader(train_, batch_size, shuffle=False)

            # Print if the loaded data is empty
            if len(data) != 0:
                
                for batch_idx, (data, target) in enumerate(train_dataloader):
                    
                    # Check total batch count
                    total_step += 1
                    
                    data = data.to(device).float()
                    target = target.to(device).long().squeeze()
                    
                    optimizer.zero_grad()
                
                    # Input to the model
                    outputs = model(data)
                    outputs = outputs.squeeze()
                    
                    # Calculate loss
                    loss = criterion(outputs, target)
                    loss.backward()
                    optimizer.step()
                    
                    running_loss += loss.item()
                    
                    # Set label predictions 
                    _,pred = torch.max(outputs, dim=1)
                    target_ = target.view_as(pred)
                    correct += torch.sum(pred==target).item()
                    total += target.size(0)
                    
                    preds_.append(pred)
                    targets_.append(target_)
                    
            # Update start_time and end_time for next batch
            start_time = end_time
            end_time = str(datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
            
            # Select the remaining portions at the end
            if end_time >= end_time_train and not flag:
                
                end_time = end_time_train
                flag = True   

        train_acc.append(100 * correct / total)
        train_loss.append(running_loss/total_step)
        print(f'\ntrain loss: {np.mean(train_loss)}, train acc: {(100 * correct / total):.4f}')

        # Combine predictions and labels collected from all batches
        preds_ = torch.cat(preds_).detach().cpu().numpy()
        targets_ = torch.cat(targets_).detach().cpu().numpy()
        
        f1score = f1_score(targets_, preds_,  average='macro')
        if best_f1 < f1score:
            best_f1 = f1score
            # Save the best model 
            with open("./result/Driving_Behavior_Buffered_Fetch.txt", "a") as text_file:
                print('epoch=====',epoch, file=text_file)
                print(classification_report(targets_, preds_, digits=4), file=text_file)
            print('model save')
            torch.save(model, f'./result/Driving_Behavior_Buffered_Fetch.pt') 
        epochs.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_f1}")
     
    return model

In [6]:
# Set number of epochs
epochs = trange(20, desc='training')
# Unit of time ('days', 'hours', 'minutes', 'seconds')
unit = 'minutes'
Fetch_size = 30
# Set Batch Size
batch_size = 64
# Set Train Time
start_time_train = '2025-07-18 00:00:00'
end_time_train = '2025-07-18 01:02:07'
# Set up scalers
scaler = MinMaxScaler()
# Set Min, Max value
Min, Max = set_minmax_value(URL, table, name, start_time_train, end_time_train)

#################################################################Training#############################################################################################
train(epochs, start_time_train, end_time_train, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size)

training:   0%|          | 0/20 [00:00<?, ?it/s]

  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



train loss: 1.2198508590459824, train acc: 28.3069
model save

train loss: 1.1598082239429157, train acc: 26.2678
model save

train loss: 1.107899581723743, train acc: 35.2562
model save

train loss: 1.0398582958383487, train acc: 46.1765
model save

train loss: 0.9808812577128994, train acc: 46.0961

train loss: 0.939815077694382, train acc: 58.1701
model save

train loss: 0.8971152202351647, train acc: 68.6880
model save

train loss: 0.8542854312003783, train acc: 71.7467
model save

train loss: 0.8334455578039692, train acc: 70.3247

train loss: 0.8017001299720323, train acc: 78.2935
model save

train loss: 0.7874872069343012, train acc: 63.5900

train loss: 0.7601574423671537, train acc: 79.8229
model save

train loss: 0.7474860537818434, train acc: 77.5154

train loss: 0.7221747141092021, train acc: 84.2501
model save

train loss: 0.7121277198212597, train acc: 81.9157

train loss: 0.6894723783857291, train acc: 86.2087
model save

train loss: 0.6800033957081704, train acc: 83.79

TCN(
  (network): Sequential(
    (0): TemporalBlock(
      (conv1): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
      (relu1): ReLU()
      (dropout1): Dropout(p=0.05, inplace=False)
      (conv2): Conv1d(32, 32, kernel_size=(1,), stride=(1,))
      (relu2): ReLU()
      (dropout2): Dropout(p=0.05, inplace=False)
      (net): Sequential(
        (0): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
        (1): ReLU()
        (2): Dropout(p=0.05, inplace=False)
        (3): Conv1d(32, 32, kernel_size=(1,), stride=(1,))
        (4): ReLU()
        (5): Dropout(p=0.05, inplace=False)
      )
      (downsample): Conv1d(16, 32, kernel_size=(1,), stride=(1,))
      (relu): ReLU()
    )
    (1): TemporalBlock(
      (conv1): Conv1d(32, 64, kernel_size=(1,), stride=(1,), dilation=(2,))
      (relu1): ReLU()
      (dropout1): Dropout(p=0.05, inplace=False)
      (conv2): Conv1d(64, 64, kernel_size=(1,), stride=(1,), dilation=(2,))
      (relu2): ReLU()
      (dropout2): Dropout(p=0.05, inplace

## Model Test

In [7]:
def test(model, start_time_test, end_time_test, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size):
    
    with torch.no_grad():
                
        model.eval()
        
        # Initial settings 
        output_test = []
        output_target = []
        
        # Set initial Time
        args = {unit: Fetch_size}
        start_time = start_time_test
        end_time = str(datetime.strptime(start_time_test, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
        end_time_test_ = str(datetime.strptime(end_time_test, "%Y-%m-%d %H:%M:%S") + timedelta(minutes=1))
        
        # Set flag
        flag = False
        
        while end_time < end_time_test_:
            
            # Load batch data
            data = data_load(URL, table, name, start_time, end_time, timeformat, resample_freq)

            # Rename the 'Class' column to 'label'
            data.rename(columns={'Class': 'label'}, inplace=True)

            # Move the 'label' column to the last position
            data = data.reindex(columns=[col for col in data.columns if col != 'label'] + ['label'])

            # Convert the 'label' column to integer type
            data['label'] = data['label'].astype(int)

            # Apply MinMaxscaler
            data_scaled = scaler.fit_transform(data.iloc[:,:-1].values, Min.drop(columns=[0]).values, Max.drop(columns=[0]).values)
            
            # Set up the DataFrame
            data_ = pd.DataFrame(data_scaled, index=data.index)
            data_['label'] = data['label'].values
            
            # Set TimeFeatureGenerator
            feature_generator = TimeFeatureGenerator()  
            
            # Make Time Featrue
            time_feature = feature_generator.generate_features(data.index)
            
            # concat origin dataset
            data_ = pd.concat([data_, time_feature], axis=1)
            
            # Drop NaN values
            data = data_.dropna()

            # Set up dataset & Loader
            test_ = Driving_Dataset(data, target_column='label', seq_length=2)

            test_dataloader = DataLoader(test_, batch_size, shuffle=False)
            
            # Print if the loaded data is empty
            if len(data) != 0:
                
                for batch_idx, (data, target) in enumerate(test_dataloader):
                    
                    data = data.to(device).float()
                    target = target.to(device).long().squeeze()
                
                    # Input to the model
                    outputs = model(data)
                    outputs = outputs.squeeze()
                    
                    # Set label predictions 
                    _,pred = torch.max(outputs, dim=1)
                    target_ = target.view_as(pred)
                    
                    output_test.append(pred)
                    output_target.append(target_)
        
            # Update start_time and end_time for next batch
            start_time = end_time
            end_time = str(datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
            
            # Select the remaining portions at the end
            if end_time >= end_time_test and not flag:
                
                end_time = end_time_test
                flag = True   
 
        # Combine tensors into one
        combined_tensor_target = torch.cat(output_target, dim=0)
        combined_tensor_pred = torch.cat(output_test, dim=0)

        # Change to NumPy format
        real_values = combined_tensor_target.cpu().numpy()
        real_pred_values = combined_tensor_pred.cpu().numpy()

    return real_values, real_pred_values  

In [10]:
# Load the best model
model_ = torch.load(f'./result/Driving_Behavior_Buffered_Fetch.pt')

# Unit of time ('days', 'hours', 'minutes', 'seconds')
unit = 'minutes'
Fetch_size = 30
# Set Test Time
start_time_test = '2025-07-18 01:02:07'
end_time_test = '2025-07-18 01:52:07'
##################################################################################Test############################################################################################################
real_values, real_pred_values = test(model_, start_time_test, end_time_test, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size)

  return F.conv1d(input, weight, bias, self.stride,


## Model Performance Evaluation

In [11]:
print(classification_report(real_values, real_pred_values))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       997
           1       1.00      0.89      0.94      1273
           2       0.18      0.34      0.23       730

    accuracy                           0.46      3000
   macro avg       0.39      0.41      0.39      3000
weighted avg       0.47      0.46      0.46      3000

