## Import Necessary Libraries

In [1]:
## Import necessary libraries
import pandas as pd
import numpy as np
import random 
from urllib.parse import quote
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from scipy.fft import fft

## Import libraries for the model
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from tqdm.notebook import trange
from sklearn.metrics import f1_score, classification_report

## Set path for saving model training results  
import os
os.makedirs('./result', exist_ok=True)

## Set Cuda for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Set random seed
def set_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

# Set seed
seed_val = 77
set_seed(seed_val)

cuda


## Selecting Data Columns
* Tag names are loaded in sequential order.
* The process of selecting the required tag names from the tag name list.

In [2]:
# Function to display tag names
def show_column(URL):
    
    # Load tag name data
    df = pd.read_csv(URL)
    
    # Convert to list format
    df = df.values.reshape(-1)
    
    return df.tolist()

In [4]:
## Set parameters for displaying tag names
table = 'bci1'

NAME_URL = f'http://127.0.0.1:5654/db/tql/datahub/api/v1/get_tag_names.tql?table={table}'

## Generate tag name list
tag_name = show_column(NAME_URL)

In [5]:
tag_name

['test-s-0',
 'test-s-1',
 'test-s-10',
 'test-s-11',
 'test-s-12',
 'test-s-13',
 'test-s-14',
 'test-s-15',
 'test-s-16',
 'test-s-17',
 'test-s-18',
 'test-s-19',
 'test-s-2',
 'test-s-20',
 'test-s-21',
 'test-s-22',
 'test-s-23',
 'test-s-24',
 'test-s-25',
 'test-s-26',
 'test-s-27',
 'test-s-28',
 'test-s-29',
 'test-s-3',
 'test-s-30',
 'test-s-31',
 'test-s-32',
 'test-s-33',
 'test-s-34',
 'test-s-35',
 'test-s-36',
 'test-s-37',
 'test-s-38',
 'test-s-39',
 'test-s-4',
 'test-s-40',
 'test-s-41',
 'test-s-42',
 'test-s-43',
 'test-s-44',
 'test-s-45',
 'test-s-46',
 'test-s-47',
 'test-s-48',
 'test-s-49',
 'test-s-5',
 'test-s-50',
 'test-s-51',
 'test-s-52',
 'test-s-53',
 'test-s-54',
 'test-s-55',
 'test-s-56',
 'test-s-57',
 'test-s-58',
 'test-s-59',
 'test-s-6',
 'test-s-60',
 'test-s-61',
 'test-s-62',
 'test-s-63',
 'test-s-7',
 'test-s-8',
 'test-s-9',
 'train-s-0',
 'train-s-1',
 'train-s-10',
 'train-s-11',
 'train-s-12',
 'train-s-13',
 'train-s-14',
 'train-s-1

## Converting TAG Name Format
* After checking all the Tag Names from the BCI1 dataset in the previous step, extract only the columns to be used and convert them into parameter format.
* Select all training data.

In [6]:
# Set the desired tag names
tags = tag_name[64:]

# Wrap each item in the list with single quotes and separate with commas
tags_= ",".join(f"'{tag}'" for tag in tags)

# Check the selected tag names
print(tags_)

'train-s-0','train-s-1','train-s-10','train-s-11','train-s-12','train-s-13','train-s-14','train-s-15','train-s-16','train-s-17','train-s-18','train-s-19','train-s-2','train-s-20','train-s-21','train-s-22','train-s-23','train-s-24','train-s-25','train-s-26','train-s-27','train-s-28','train-s-29','train-s-3','train-s-30','train-s-31','train-s-32','train-s-33','train-s-34','train-s-35','train-s-36','train-s-37','train-s-38','train-s-39','train-s-4','train-s-40','train-s-41','train-s-42','train-s-43','train-s-44','train-s-45','train-s-46','train-s-47','train-s-48','train-s-49','train-s-5','train-s-50','train-s-51','train-s-52','train-s-53','train-s-54','train-s-55','train-s-56','train-s-57','train-s-58','train-s-59','train-s-6','train-s-60','train-s-61','train-s-62','train-s-63','train-s-7','train-s-8','train-s-9','train-s-answer'


## Load BCI1 Dataset
* Load the data using the Tag Names.

In [7]:
# Data loading parameter settings

# Set the tag table name
table = 'bci1'
# Set the tag names
name = quote(tags_, safe=":/")
# Set the time format
timeformat = quote('2006-01-02 15:04:05.000000')
# Set the data start time
start_time = quote('2024-01-01 00:00:00')
# Set the data end time
end_time = quote('2024-01-01 04:37:03')

In [8]:
# Data loading function
def data_load(table, tag_name, name, start_time, end_time, timeformat):
    
    # List to store the results
    result_dfs = []
    
    # Load data
    df = pd.read_csv(f'http://127.0.0.1:5654/db/tql/datahub/api/v1/select-rawdata.tql?table={table}&name={name}&start={start_time}&end={end_time}&timeformat={timeformat}')

    # Convert to data grouped by the time
    df = df.pivot_table(index='TIME', columns='NAME', values='VALUE', aggfunc='first').reset_index()

    # Separate target values
    df_label = df.iloc[:, -1:].dropna()

    # Remove target column
    df = df.iloc[:, :-1]

    for col_name in tag_name[64:-1]:

        # Set TIME column
        df['TIME'] = pd.to_datetime(df['TIME'], format='%Y-%m-%d %H:%M:%S.%f')

        # Group by 3-second intervals and count the number of records
        df_counts = df.groupby(df['TIME'].dt.floor('3S')).size().reset_index(name='count')

        # Filter only groups with the most common count
        most_common_count = df_counts['count'].mode()[0]
        filtered_df_counts = df_counts[df_counts['count'] == most_common_count]

        # Convert filtered time values to a list
        filtered_times = filtered_df_counts['TIME'].tolist()

        # Select only the filtered time values from the original DataFrame
        filtered_data = df[df['TIME'].dt.floor('3S').isin(filtered_times)]

        # Group by TIME (rounded to 3-second intervals)
        filtered_data_ = filtered_data.copy()
        filtered_data_['TIME'] = filtered_data_['TIME'].dt.floor('3S')
        grouped = filtered_data_.groupby('TIME')[col_name].apply(list).reset_index()

        # Split the list into individual columns
        result_df = pd.DataFrame(grouped[col_name].tolist())

        # Add result to the list
        result_dfs.append(result_df)
        
    # Initialize the list to store results
    data_list = []
    k = 0

    for k in result_dfs:
        
        # Convert to array format
        data = k.values
        data_list.append(data)

    # Convert the list to a NumPy array
    data_array = np.array(data_list)

    # Reshape to the required format
    # Transform to the shape (number of data, 64, 3000)
    reshaped_array = np.transpose(data_array, (1, 0, 2)) 
    
    # Modify 'train-s-answer'
    df_label.loc[df_label['train-s-answer'] == -1.0, 'train-s-answer'] = 0
    df_label['train-s-answer'] = df_label['train-s-answer'].astype(int)

    return reshaped_array, df_label

In [9]:
# Data loading

# Load training data
train, train_label = data_load(table, tag_name, name, start_time, end_time, timeformat)

# Split data -> train, validation, test
train, test, train_label, test_label = train_test_split(train, train_label, test_size=0.2, random_state=77)
test, valid, test_label, valid_label = train_test_split(test, test_label, test_size=0.5, random_state=77)

train_label = train_label.reset_index(drop=True)
valid_label = valid_label.reset_index(drop=True)

## Data Preprocessing
   * 1 Hanning Window
   * 2 FFT 
   * 3 PCA

### 1. Applying Hanning Window

In [10]:
# Hanning window function setup 
def hanning_window(length):
    return 0.5 * (1 - np.cos(2 * np.pi * np.arange(length) / (length - 1)))

In [11]:
# Parameter Setup
window_length = 3000

# Applying Hanning Window
train_ = train * hanning_window(3000)
valid_ = valid * hanning_window(3000)
test_ = test * hanning_window(3000)

### 2. Applying FFT (Fast Fourier Transform)

In [12]:
# FFT transformation function
def change_fft(sample_rate, data):
    # Total number of samples in the signal
    N = sample_rate
    
    # Initialize an array to store FFT results for each channel
    fft_results = np.zeros((data.shape[0], data.shape[1], N // 2 + 1), dtype=float)
    
    # Apply FFT to the entire dataset
    for i in range(data.shape[0]):  # For each sample
        for j in range(data.shape[1]):  # For each channel
            yf = fft(data[i, j], n=N)  # Calculate FFT
            # Compute the absolute value of the FFT result and normalize (only the meaningful part)
            fft_results[i, j] = 2.0 / N * np.abs(yf[:N // 2 + 1])
    
    return fft_results

In [13]:
# Sampling period -> Number of data points per second
sampling_rate = 3000

# Apply FFT transformation
train_ = change_fft(sampling_rate, train_)
valid_ = change_fft(sampling_rate, valid_)
test_ = change_fft(sampling_rate, test_)

### 3. Applying PCA (Principal Component Analysis)

In [14]:
# PCA application function
def apply_pca(train_data, valid_data, test_data, n_components=0.95):
    
    # Reshape to 2D
    train_reshaped = train_data.reshape(-1, train_data.shape[2])
    valid_reshaped = valid_data.reshape(-1, valid_data.shape[2])
    test_reshaped = test_data.reshape(-1, test_data.shape[2]) 

    # Fit PCA on training data
    pca = PCA(n_components=n_components)
    train_pca = pca.fit_transform(train_reshaped)
    
    # Apply PCA to validation and test data
    valid_pca = pca.transform(valid_reshaped)
    test_pca = pca.transform(test_reshaped)

    # Reshape back to original 3D format
    train_pca_reshaped = train_pca.reshape(train_data.shape[0], train_data.shape[1], -1)
    valid_pca_reshaped = valid_pca.reshape(valid_data.shape[0], valid_data.shape[1], -1)
    test_pca_reshaped = test_pca.reshape(test_data.shape[0], test_data.shape[1], -1)
    
    return train_pca_reshaped, valid_pca_reshaped, test_pca_reshaped, pca

# Apply PCA to the datasets
train_pca, valid_pca, test_pca, pca_model = apply_pca(train_, valid_, test_, n_components=0.95)

## Dataset & Loader Setup

In [15]:
# Dataset Setup
class EEGDataset(Dataset):
    def __init__(self, data, labels):
        
        self.data = torch.Tensor(data)
        self.labels = torch.Tensor(labels)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        input_time_data = self.data[index]
        label = self.labels[index]

        return input_time_data, label

In [16]:
# Dataset setup 
train_ = EEGDataset(train_pca, train_label.values)
valid_ = EEGDataset(valid_pca, valid_label.values)
test_ = EEGDataset(test_pca, test_label.values)

# Data loader setup
train_dataloader = DataLoader(train_, batch_size=16, shuffle=True)
valid_dataloader = DataLoader(valid_, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_, batch_size=1, shuffle=False)

In [17]:
# Verify DataLoader application and check the shape of the input data
print(list(train_dataloader)[0][0].shape)

torch.Size([16, 64, 14])


## Model Configuration
* Using ResNet1d model.

In [18]:
## ResNet 1D Model Setup 
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        
        # Identity mapping
        self.shortcut = nn.Sequential()
        if in_channels != out_channels:
            self.shortcut = nn.Conv1d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64 

        
        self.conv1 = nn.Conv1d(64, 64, kernel_size=7, stride=2, padding=3) 
        self.bn1 = nn.BatchNorm1d(64)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1])
        self.layer3 = self._make_layer(block, 256, layers[2])
        self.layer4 = self._make_layer(block, 512, layers[3])
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, blocks):
        layers = []
        layers.append(block(self.in_channels, out_channels))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [19]:
# Model configuration parameters
# Learning rate
learning_rate = 0.01

# Model configuration
model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=2).to(device)

# Configure loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Check the model architecture
print(model)

ResNet(
  (conv1): Conv1d(64, 64, kernel_size=(7,), stride=(2,), padding=(3,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
  )
  (layer2): Sequ

## Model Training
* Save the model with the highest F1 score based on the validation data during training.

In [20]:
# Initialize training loss
train_loss = []
# Initialize training accuracy
train_acc = []
# Initialize total step
total_step = len(train_dataloader)
# Set number of epochs
epoch_in = trange(100, desc='training')
# Initialize best F1 Score value
best_f1= 0

# Start model training
for epoch in epoch_in:
    model.train()
    running_loss = 0.0
    correct = 0
    total=0

    preds_ = []
    targets_ = []

    for batch_idx, train_data in enumerate(train_dataloader):

        inputs = train_data[0].to(device).float()
        labels = train_data[1].to(device).long().squeeze()
        
        optimizer.zero_grad()
        
        # Input to the model
        outputs = model(inputs)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Set label predictions
        _, pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred == labels).item()
        total += labels.size(0)
        
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss / total_step)
    print(f'\ntrain loss: {np.mean(train_loss)}, train acc: {(100 * correct / total):.4f}')
    
    # Perform validation at the end of each epoch and save the model with the best performance
    with torch.no_grad():
        model.eval()
        
        for batch_idx, valid_data in enumerate(valid_dataloader):

            inputs_v = valid_data[0].to(device).float()
            labels_v = valid_data[1].to(device).long().squeeze() 
            
            outputs_v = model(inputs_v)
            
            # Set label predictions
            _, pred_v = torch.max(outputs_v, dim=1)
            target_v = labels_v.view_as(pred_v)
            
            preds_.append(pred_v)
            targets_.append(target_v)
            
        # Combine predictions and labels collected from all batches
        preds_ = torch.cat(preds_).detach().cpu().numpy()
        targets_ = torch.cat(targets_).detach().cpu().numpy()
        
        f1score = f1_score(targets_, preds_, average='macro')
        if best_f1 < f1score:
            best_f1 = f1score
            # Save the best model
            with open("./result/BCI1_ResNet1d_General.txt", "a") as text_file:
                print('epoch=====',epoch, file=text_file)
                print(classification_report(targets_, preds_, digits=4), file=text_file)
            torch.save(model, f'./result/BCI1_ResNet1d_General.pt') 
        epoch_in.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_f1}")

training:   0%|          | 0/100 [00:00<?, ?it/s]

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass



train loss: 1.4153182591710771, train acc: 49.0991

train loss: 1.1911047611917769, train acc: 51.3514

train loss: 1.1260899901390076, train acc: 49.0991

train loss: 1.0170632281473704, train acc: 56.3063

train loss: 0.9465377867221832, train acc: 58.1081

train loss: 0.8920519987032528, train acc: 63.9640

train loss: 0.8320432107667534, train acc: 78.8288

train loss: 0.7660457894339094, train acc: 90.0901

train loss: 0.712477548668782, train acc: 89.1892

train loss: 0.6751021089098816, train acc: 88.2883

train loss: 0.6252017873339356, train acc: 96.3964

train loss: 0.5772606652428208, train acc: 98.6486

train loss: 0.5492561949963253, train acc: 90.5405

train loss: 0.5180750247465248, train acc: 95.4955

train loss: 0.4896357618787859, train acc: 94.5946

train loss: 0.4717200311637758, train acc: 92.7928

train loss: 0.4527906350847216, train acc: 94.1441

train loss: 0.4353300654685073, train acc: 93.2432

train loss: 0.41912484790515364, train acc: 95.0450

train loss:

## Model Testing

In [21]:
# Load the best model
model_ = torch.load(f'./result/BCI1_ResNet1d_General.pt').to(device)

In [22]:
# Model testing
preds_test = []
target_test = []
with torch.no_grad():
    model_.eval()
    for batch_idx, test_data in enumerate(test_dataloader):
        inputs_t = test_data[0].to(device).float()
        labels_t = test_data[1].to(device).long().squeeze()
        
        outputs_t = model_(inputs_t)
        
        _, pred_t = torch.max(outputs_t, dim=1)

        preds_test.append(pred_t)
        target_test.append(labels_t.item())
        
    # Combine predictions and labels collected from all batches
    preds_test = torch.cat(preds_test).detach().cpu().numpy()

# Create a DataFrame for test results
final = pd.DataFrame(target_test, columns=['label'])
final['pred'] = preds_test

## Model Performance Evaluation

In [23]:
print(classification_report(final['label'].values, final['pred'].values))

              precision    recall  f1-score   support

           0       0.87      0.81      0.84        16
           1       0.77      0.83      0.80        12

    accuracy                           0.82        28
   macro avg       0.82      0.82      0.82        28
weighted avg       0.82      0.82      0.82        28

