## Import Necessary Libraries

In [1]:
## Import necessary libraries
import pandas as pd
import numpy as np
import random 
from urllib.parse import quote
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from scipy.fftpack import fft
from sklearn.decomposition import PCA

## Import libraries for the model
import torch
import torch.nn as nn
import xgboost as xgb
from sklearn.metrics import classification_report

## Set path for saving model training results 
import os
os.makedirs('./result', exist_ok=True)

## Set Cuda for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Set random seed
def set_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

# Set seed
seed_val = 77
set_seed(seed_val)

cuda


## Selecting Data Columns
* Tag names are loaded in sequential order.
* The process of selecting the required tag names from the tag name list.

In [2]:
# Function to display tag names
def show_column(URL):
    
    # Load tag name data
    df = pd.read_csv(URL)
    
    # Convert to list format
    df = df.values.reshape(-1)
    
    return df.tolist()

In [3]:
## Set parameters for displaying tag names
table = 'vibe_unbal'

NAME_URL = f'http://127.0.0.1:5654/db/tql/datahub/api/v1/get_tag_names.tql?table={table}'

## Generate tag name list
name = show_column(NAME_URL)

In [4]:
name

['0_Measured_RPM',
 '0_V_in',
 '0_Vibration_1',
 '0_Vibration_2',
 '0_Vibration_3',
 '0_unbalance_Factor',
 '1_Measured_RPM',
 '1_V_in',
 '1_Vibration_1',
 '1_Vibration_2',
 '1_Vibration_3',
 '1_unbalance_Factor',
 '2_Measured_RPM',
 '2_V_in',
 '2_Vibration_1',
 '2_Vibration_2',
 '2_Vibration_3',
 '2_unbalance_Factor',
 '3_Measured_RPM',
 '3_V_in',
 '3_Vibration_1',
 '3_Vibration_2',
 '3_Vibration_3',
 '3_unbalance_Factor',
 '4_Measured_RPM',
 '4_V_in',
 '4_Vibration_1',
 '4_Vibration_2',
 '4_Vibration_3',
 '4_unbalance_Factor',
 '5_Measured_RPM',
 '5_V_in',
 '5_Vibration_1',
 '5_Vibration_2',
 '5_Vibration_3',
 '5_unbalance_Factor',
 '6_Measured_RPM',
 '6_V_in',
 '6_Vibration_1',
 '6_Vibration_2',
 '6_Vibration_3',
 '6_unbalance_Factor',
 '7_Measured_RPM',
 '7_V_in',
 '7_Vibration_1',
 '7_Vibration_2',
 '7_Vibration_3',
 '7_unbalance_Factor',
 '8_Measured_RPM',
 '8_V_in',
 '8_Vibration_1',
 '8_Vibration_2',
 '8_Vibration_3',
 '8_unbalance_Factor',
 '9_Measured_RPM',
 '9_V_in',
 '9_Vib

## Converting TAG Name Format
* After checking all the Tag Names from the Vibration Unbalance dataset in the previous step, extract only the columns to be used and convert them into parameter format.
* Use tag names related to the 0 & 1 for classification

In [5]:
# Set the desired each tag names
tags_0 = name[:6]
tags_1 = name[6:12]

# Wrap each item in the list with single quotes and separate with commas
tags_0 = ",".join(f"'{tag}'" for tag in tags_0)
tags_1 = ",".join(f"'{tag}'" for tag in tags_1)

# Check the selected tag names
print(tags_0)
print(tags_1)

'0_Measured_RPM','0_V_in','0_Vibration_1','0_Vibration_2','0_Vibration_3','0_unbalance_Factor'
'1_Measured_RPM','1_V_in','1_Vibration_1','1_Vibration_2','1_Vibration_3','1_unbalance_Factor'


## Load Vibration Unbalance Dataset
* Load the data using the Tag Names.

In [6]:
# Data loading parameter settings

# Set the tag table name
table = 'vibe_unbal'
# Set the tag names
name_normal = quote(tags_0, safe=":/")
name_abnomal = quote(tags_1, safe=":/")
# Set the time format
timeformat = quote('2006-01-02 15:04:05.000000')
# Set the data start time
start_time = quote('2024-10-07 00:00:00')
# Set the data end time
end_time = quote('2024-10-07 02:00:00')

In [7]:
# Data loading function
# Preprocess for each vibration
# Rotation speed, voltage, and unbalance factor are combined into each vibration DataFrame
def data_load(table, name, start_time, end_time, timeformat):
    
    # Load data 
    df = pd.read_csv(f'http://127.0.0.1:5654/db/tql/datahub/api/v1/select-rawdata.tql?table={table}&name={name}&start={start_time}&end={end_time}&timeformat={timeformat}')

    # Convert to data grouped by time
    df = df.pivot_table(index='TIME', columns='NAME', values='VALUE', aggfunc='first').reset_index()

    # Select rotation speed, voltage, and unbalance factor
    df_non_vibe = df.iloc[:, [1, 2, -1]].copy()

    # Convert 'TIME' column to datetime format (skip if already in datetime format)
    df_non_vibe['TIME'] = pd.to_datetime(df['TIME'], format='%Y-%m-%d %H:%M:%S.%f')

    # Set 'TIME' column as index (resample operates based on the index)
    df_non_vibe.set_index('TIME', inplace=True)

    # Resample to 1-second intervals
    df_non_vibe = df_non_vibe.resample('1S').mean().reset_index()
    
    # Set up a list for vibration data 
    vibe = []
    
    # Process each vibration column 
    for i in range(3):
    
        # Separate the DataFrame for vibration data
        df_vibe = df[df.columns[3+i:4+i]].copy()

        # Set 'TIME' column
        df_vibe['TIME'] = pd.to_datetime(df['TIME'], format='%Y-%m-%d %H:%M:%S.%f')
        
        # Group by seconds and count the number of records
        df_counts = df_vibe.groupby(df_vibe['TIME'].dt.floor('S')).size().reset_index(name='count')

        # Filter groups with the same number of records
        # Select the most common count values
        most_common_count = df_counts['count'].mode()[0]

        # Filter by the most common count value
        filtered_df_counts = df_counts[df_counts['count'] == most_common_count]

        # Convert filtered time values to a list
        filtered_times = filtered_df_counts['TIME'].tolist()

        # Select only the filtered time values from the original DataFrame
        filtered_data = df_vibe[df_vibe['TIME'].dt.floor('S').isin(filtered_times)]

        # Group by TIME
        # Round to the nearest second
        filtered_data_ = filtered_data.copy()
        filtered_data_.loc[:, 'TIME'] = filtered_data_['TIME'].dt.floor('S')
        grouped = filtered_data_.groupby('TIME')[df.columns[3+i:4+i].item()].apply(list).reset_index()

        # Split the list into individual columns
        df_vibe_1 = pd.DataFrame(grouped[df.columns[3+i:4+i].item()].tolist())

        # Merge with the 'TIME' column
        result_df = pd.concat([grouped[['TIME']], df_non_vibe.iloc[:, 1:], df_vibe_1], axis=1)

        # Remove missing values -> last line 
        result_df = result_df.dropna()
        
        # drop Time column
        result_df.drop(columns=['TIME'], inplace=True)
        
        # Remove numbers and underscores from the beginning of the first three column names
        columns_to_modify = result_df.columns[:3]
        
        # Create updated column names
        new_columns = columns_to_modify.str.replace(r'^\d+_', '', regex=True)
        
        # Update the entire column names
        result_df.columns = new_columns.tolist() + result_df.columns[3:].tolist()
        
        # set label 
        result_df['label'] = (result_df['unbalance_Factor'] != 0.0).astype(int)
        
        # drop unbalance_Factor column
        result_df.drop(columns=['unbalance_Factor'], inplace=True)
        
        # Save to the list
        vibe.append(result_df)
    
    return vibe

In [8]:
# Load data
df_nomal_list = data_load(table, name_normal, start_time, end_time, timeformat)
df_abnomal_list = data_load(table, name_abnomal, start_time, end_time, timeformat)

In [20]:
# Combine data by vibration
# Set label based on unbalance_Factor
df_vibe_1 = pd.concat([df_nomal_list[0], df_abnomal_list[0]], axis=0)
df_vibe_2 = pd.concat([df_nomal_list[1], df_abnomal_list[1]], axis=0)
df_vibe_3 = pd.concat([df_nomal_list[2], df_abnomal_list[2]], axis=0)

# Randomly shuffle each DataFrame
df_vibe_1 = df_vibe_1.sample(frac=1, random_state=77).reset_index(drop=True)
df_vibe_2 = df_vibe_2.sample(frac=1, random_state=77).reset_index(drop=True)
df_vibe_3 = df_vibe_3.sample(frac=1, random_state=77).reset_index(drop=True)

In [21]:
# Split the each vibration data into train, test sets
train_1, test_1 = train_test_split(df_vibe_1, test_size=0.2, shuffle=False)

train_1 = train_1.reset_index(drop=True)
test_1 = test_1.reset_index(drop=True)

train_2, test_2 = train_test_split(df_vibe_2, test_size=0.2, shuffle=False)

train_2 = train_2.reset_index(drop=True)
test_2 = test_2.reset_index(drop=True)

train_3, test_3 = train_test_split(df_vibe_3, test_size=0.2, shuffle=False)

train_3 = train_3.reset_index(drop=True)
test_3 = test_3.reset_index(drop=True)

## Data Preprocessing

* 1 hanning window
* 2 FFT
* 3 MinMax Scaling
* 4 PCA

### 1. Applying Hanning Window

In [22]:
# Hanning window function setup 
def set_hanning_window(sample_rate, df):
    
    # Generate Hanning window
    hanning_window = np.hanning(sample_rate)

    # Apply Hanning window to each row
    df_windowed = df.multiply(hanning_window, axis=1)
    
    return df_windowed

In [23]:
# Sampling period -> Number of data points per second
window_length = 4096

# Applying Hanning Window each data
train_1_ = set_hanning_window(window_length, train_1.iloc[:,2:-1])
test_1_ = set_hanning_window(window_length, test_1.iloc[:,2:-1])


train_2_ = set_hanning_window(window_length, train_2.iloc[:,2:-1])
test_2_ = set_hanning_window(window_length, test_2.iloc[:,2:-1])


train_3_ = set_hanning_window(window_length, train_3.iloc[:,2:-1])
test_3_ = set_hanning_window(window_length, test_3.iloc[:,2:-1])

### 2. Applying FFT (Fast Fourier Transform)

In [24]:
# FFT transformation function
def change_fft(sample_rate, df):
    # Total number of samples in the signal
    N = sample_rate
    
    fft_results = np.zeros((df.shape[0], N // 2 + 1), dtype=float)
    
    # Apply FFT to each row
    for i in range(df.shape[0]):
        
        # Calculate FFT for each row
        yf = fft(df.iloc[i].values)
        
        # Compute the absolute value of the FFT results and normalize (only the meaningful part)
        fft_results[i] = 2.0 / N * np.abs(yf[:N // 2 + 1])
    
    # Convert FFT results to a DataFrame
    fft_df = pd.DataFrame(fft_results)
    
    return fft_df

In [25]:
# Sampling period -> Number of data points per second
sampling_rate = 4096

# Applying FFT(Fast Fourier Transform) each data
train_FFT_1 = change_fft(sampling_rate, train_1_)
test_FFT_1 = change_fft(sampling_rate, test_1_)

train_FFT_2 = change_fft(sampling_rate, train_2_)
test_FFT_2 = change_fft(sampling_rate, test_2_)

train_FFT_3 = change_fft(sampling_rate, train_3_)
test_FFT_3 = change_fft(sampling_rate, test_3_)

### 3. Applying MinMaxScaler

In [26]:
# each Scaler Setup
scaler1 = MinMaxScaler()
scaler2 = MinMaxScaler()
scaler3 = MinMaxScaler()

# Apply each Scaler
train_s1 = scaler1.fit_transform(pd.concat([train_1.iloc[:,:2], train_FFT_1], axis=1).values)
test_s1 = scaler1.transform(pd.concat([test_1.iloc[:,:2], test_FFT_1], axis=1).values)

train_s2 = scaler2.fit_transform(pd.concat([train_2.iloc[:,:2], train_FFT_2], axis=1).values)
test_s2 = scaler2.transform(pd.concat([test_2.iloc[:,:2], test_FFT_2], axis=1).values)

train_s3 = scaler3.fit_transform(pd.concat([train_3.iloc[:,:2], train_FFT_3], axis=1).values)
test_s3 = scaler3.transform(pd.concat([test_3.iloc[:,:2], test_FFT_3], axis=1).values)

# Set each DataFrames
train_scaled_1 = pd.DataFrame(train_s1)
test_scaled_1 = pd.DataFrame(test_s1)

train_scaled_2 = pd.DataFrame(train_s2)
test_scaled_2 = pd.DataFrame(test_s2)

train_scaled_3 = pd.DataFrame(train_s3)
test_scaled_3 = pd.DataFrame(test_s3)

### 4. Applying PCA (Principal Component Analysis)

In [27]:
## Applying each PCA
# Select principal components explaining 95% of the variance
pca1 = PCA(n_components=0.95)
pca2 = PCA(n_components=0.95)
pca3 = PCA(n_components=0.95)

# Apply each PCA
train_PCA_1 = pca1.fit_transform(train_scaled_1)
test_PCA_1 = pca1.transform(test_scaled_1)

train_PCA_2 = pca2.fit_transform(train_scaled_2)
test_PCA_2 = pca2.transform(test_scaled_2)

train_PCA_3 = pca3.fit_transform(train_scaled_3)
test_PCA_3 = pca3.transform(test_scaled_3)

# Set each DataFrames
train_PCA_1 = pd.DataFrame(train_PCA_1)
test_PCA_1 = pd.DataFrame(test_PCA_1)

train_PCA_2 = pd.DataFrame(train_PCA_2)
test_PCA_2 = pd.DataFrame(test_PCA_2)

train_PCA_3 = pd.DataFrame(train_PCA_3)
test_PCA_3 = pd.DataFrame(test_PCA_3)

# Add each labels
train_PCA_1['label'] = train_1['label'].values
test_PCA_1['label'] = test_1['label'].values

train_PCA_2['label'] = train_2['label'].values
test_PCA_2['label'] = test_2['label'].values

train_PCA_3['label'] = train_3['label'].values
test_PCA_3['label'] = test_3['label'].values

## Model Configuration
* Using XGBoost model
* Train three XGBoost models on three vibration datasets, then ensemble the results to make a final prediction

In [28]:
# Create the each XGBoost model
model1 = xgb.XGBClassifier()
model2 = xgb.XGBClassifier()
model3 = xgb.XGBClassifier()

## Model Training

In [29]:
# Train each model
model1.fit(train_PCA_1.iloc[:,:-1].values, train_PCA_1.iloc[:,-1:].values)
model2.fit(train_PCA_2.iloc[:,:-1].values, train_PCA_2.iloc[:,-1:].values)
model3.fit(train_PCA_3.iloc[:,:-1].values, train_PCA_3.iloc[:,-1:].values)

# Save each model
model1.save_model(f'./result/vibe_unval_XGBoost_General_1.json')
model2.save_model(f'./result/vibe_unval_XGBoost_General_2.json')
model3.save_model(f'./result/vibe_unval_XGBoost_General_3.json')

## Model Performance Evaluation

In [30]:
# Make each predictions probas
y_pred1 = model1.predict_proba(test_PCA_1.iloc[:,:-1].values)
y_pred2 = model2.predict_proba(test_PCA_2.iloc[:,:-1].values)
y_pred3 = model3.predict_proba(test_PCA_3.iloc[:,:-1].values)

# Average the predicted probabilities
final_pred_probs = (y_pred1 + y_pred2 + y_pred3) / 3

# Make final predictions based on the averaged probabilities
final_predictions = final_pred_probs.argmax(axis=1)

# Evaluate the ensemble model
report = classification_report(test_PCA_3['label'].values, final_predictions)

print('Ensemble Classification Report:')
print(report)

Ensemble Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1264
           1       1.00      1.00      1.00      1298

    accuracy                           1.00      2562
   macro avg       1.00      1.00      1.00      2562
weighted avg       1.00      1.00      1.00      2562

