In [158]:
import numpy as np
import pandas as pd
import os
import scipy.io
from scipy.signal import butter, filtfilt, iirnotch, cheby2
from einops import rearrange
import matplotlib.pyplot as plt
import seaborn as sns
# import pywt
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from lightning import Fabric
import yaml
import lightning
from pytorch_lightning.utilities.model_summary import ModelSummary
from einops import repeat
from typing import Tuple
from torch.nn import functional as F

In [159]:
import sys

sys.path.append("../../")

from classification.loaders import EEGDataset,load_data
from models.unet.eeg_unets import Unet,UnetConfig, BottleNeckClassifier, Unet1D
from classification.classifiers import DeepClassifier
from classification.loaders import subject_dataset, CSP_subject_dataset
from classification.open_bci_loaders import OpenBCIDataset,OpenBCISubject,load_files
from ntd.networks import LongConv
from ntd.diffusion_model import Diffusion
from ntd.utils.kernels_and_diffusion_utils import WhiteNoiseProcess
import json

In [160]:
torch.set_float32_matmul_precision("medium")

In [161]:
FS = 250
sns.set_style("darkgrid")
DATA_PATH = "../../data/2b_iv"
SAVE_PATH = "../../saved_models/raw_eeg"
if not os.path.isdir(SAVE_PATH):
	os.makedirs(SAVE_PATH)
CONF_PATH = "../diffusion/conf"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

cuda


In [162]:
with open(os.path.join(CONF_PATH, "train.yaml"), "r") as f:
    train_yaml = yaml.safe_load(f)
    
with open(os.path.join(CONF_PATH, "classifier.yaml"), "r") as f:
    classifier_yaml = yaml.safe_load(f)
    
with open(os.path.join(CONF_PATH, "network.yaml"), "r") as f:
    network_yaml = yaml.safe_load(f)
    
with open(os.path.join(CONF_PATH, "diffusion.yaml"), "r") as f:
    diffusion_yaml = yaml.safe_load(f)


In [163]:
with open(r"results/params/best_tiny.json","r") as f:
	best_params = json.load(f)

In [7]:
best_params

{'hidden_channel': 64, 'kernel_size': 65, 'num_scales': 2}

In [165]:
dataset = {}
for i in range(1,10):
    mat_train,mat_test = load_data("../../data/2b_iv",i)
    dataset[f"subject_{i}"] = {"train":mat_train,"test":mat_test}

REAL_DATA = "../../data/2b_iv/raw"

TRAIN_SPLIT = 6*[["train","test"]] + 3*[["train"]]
TEST_SPLIT = 6*[[]] + 3* [["test"]]

CHANNELS = [0,2]

train_dataset = EEGDataset(subject_splits=TRAIN_SPLIT,
                    dataset=None,
                    save_paths=[REAL_DATA],
                    subject_dataset_type=subject_dataset,
                    channels=CHANNELS,
                    sanity_check=False,
                    length=2.05)

test_dataset = EEGDataset(subject_splits=TEST_SPLIT,
                    dataset=None,
                    save_paths=[REAL_DATA],
                    channels=CHANNELS,
                    sanity_check=False,
                    length=2.05)

print(train_dataset.data[0].shape)
network_yaml["signal_length"] = train_dataset.data[0].shape[-1]
network_yaml["signal_channel"] = train_dataset.data[0].shape[1]
print(network_yaml["signal_length"])

(4560, 2, 512)
(4560,)
final data shape: (4560, 2, 512)
(707, 2, 512)
(707,)
final data shape: (707, 2, 512)
(4560, 2, 512)
512


In [214]:
print(f"path {os.getcwd()}")
files = load_files("../../data/collected_data/")
train_split = 2*[["train"]]
test_split = 2*[["test"]]
save_path = os.path.join("processed","raw")
csp_save_path = os.path.join("processed","data/collected_data/csp")

train_csp_dataset = OpenBCIDataset(
	subject_splits=train_split,
	dataset=files,
	save_paths=[save_path],
	fake_data=None,
	dataset_type=OpenBCISubject,
	channels=np.arange(0,2),
	subject_channels=["ch2","ch5"],
	stride=25,
	epoch_length=512
)

test_csp_dataset = OpenBCIDataset(
	subject_splits=test_split,
	save_paths=[save_path],
	fake_data=None,
	dataset_type=OpenBCISubject,
	channels=np.arange(0,2),
	subject_channels=["ch2","ch5"],
	stride=25,
	epoch_length=512
)

path d:\Machine learning\MI SSL\motor-imagery-classification-2024\models\diffusion
Saving new data
(1984, 2, 512)
(1984,)
final data shape: (1984, 2, 512)
Loading saved data
(992, 2, 512)
(992,)
final data shape: (992, 2, 512)


In [215]:
lr = 6E-4
num_epochs = 250
time_dim = 12
hidden_channel = best_params["hidden_channel"]
kernel_size = best_params["kernel_size"]
num_scales = best_params["num_scales"]
decay_min = 2
decay_max = 2
activation_type = "leaky_relu"
use_fft_conv = kernel_size * (2 ** (num_scales - 1)) >= 100
num_timesteps = 1000
schedule = "linear"
# If the schedule is not cosine, we need to test the end_beta
start_beta = 0.0001
end_beta = 0.08

In [216]:
network = LongConv(
			signal_length=network_yaml["signal_length"],
			signal_channel=2, # The CSP classifier components
			time_dim=time_dim,
			cond_channel=network_yaml["cond_channel"], # The cond channel will contain the cue (0 or 1)
			hidden_channel=hidden_channel,
			in_kernel_size=kernel_size,
			out_kernel_size=kernel_size,
			slconv_kernel_size=kernel_size,
			num_scales=num_scales,
			decay_min=decay_min,
			decay_max=decay_max,
			heads=network_yaml["heads"],
			activation_type=activation_type,
			use_fft_conv=use_fft_conv,
		)

noise_sampler = WhiteNoiseProcess(1.0, network_yaml["signal_length"])

diffusion_model = Diffusion(
	network=network,
	diffusion_time_steps=num_timesteps,
	noise_sampler=noise_sampler,
	mal_dist_computer=noise_sampler,
	schedule=schedule,
	start_beta=start_beta,
	end_beta=end_beta,
)

In [217]:
FABRIC = Fabric(accelerator="cuda",precision="bf16-mixed")

Using bfloat16 Automatic Mixed Precision (AMP)


In [218]:

train_loader = DataLoader(
	train_dataset,
	train_yaml["batch_size"]
)

loss_per_epoch = []

stop_counter = 0
min_delta = 0.05
tolerance = 30

In [221]:
train_yaml["batch_size"]

64

In [219]:
def train(fabric,diffusion_model,train_loader):
	optimizer = optim.AdamW(
        network.parameters(),
        lr=lr,
    )

	diffusion_model,optimizer = fabric.setup(diffusion_model,optimizer)

	train_loader = fabric.setup_dataloaders(train_loader)
	for i in range(num_epochs):
			
			epoch_loss = []
			for batch in train_loader:
				
				with fabric.autocast():

					signal,cue = batch
					signal = signal.to(torch.bfloat16)
					cue = cue.to(torch.bfloat16)
					cond = cue.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, network_yaml["signal_length"]).to(DEVICE)
					loss = diffusion_model.train_batch(signal.to(DEVICE),
										 cond=cond)
				loss = torch.mean(loss)
				
				epoch_loss.append(loss.item())
				
				fabric.backward(loss)
				# loss.backward()
				optimizer.step()
				optimizer.zero_grad()
				
			epoch_loss = np.mean(epoch_loss)
			loss_per_epoch.append(epoch_loss)

			print(f"Epoch {i} loss: {epoch_loss}")

			print(f"diff: {epoch_loss - min(loss_per_epoch)}")

In [220]:
fabric_train = lambda model,loader : train(FABRIC,model,loader)

In [196]:
fabric_train(diffusion_model,train_loader)

In [197]:
diffusion_model.load_state_dict(torch.load("results/saved_models/tiny_slc_2_channels.pt"))

<All keys matched successfully>

In [198]:
class ClassificationHead(lightning.LightningModule):

	def __init__(self,
			  channels: Tuple[int],
			  pool=None) -> None:
		super().__init__()

		self.mlp = nn.ModuleList()
		for i in range(len(channels)-1):
			self.mlp.append(nn.Linear(channels[i],channels[i+1]))
			self.mlp.append(nn.ReLU())
		self.mlp.append(nn.Linear(channels[-1],2))
		self.pool = pool

	def forward(self,x):
		x = x[...,-1]
		for i in self.mlp:
			x = i(x)
		return x

In [199]:
class EEGNetHead(lightning.LightningModule):
	def __init__(self,c_in,d_out):
		super().__init__()
		self.T = 120
		
		# Layer 1
		self.conv1 = nn.Conv2d(1, 16, (c_in, 1), padding = 0)
		self.norm1 = nn.BatchNorm2d(16, False)
		
		# Layer 2
		self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
		self.conv2 = nn.Conv2d(1, 4, (2, 32))
		self.norm2 = nn.BatchNorm2d(4, False)
		self.pooling2 = nn.MaxPool2d(2, 4)
		
		# Layer 3
		self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
		self.conv3 = nn.Conv2d(4, 4, (8, 4))
		self.norm3 = nn.BatchNorm2d(4, False)
		self.pooling3 = nn.MaxPool2d((2, 4))
		self.out_proj = nn.Linear(d_out,2)

	def forward(self, x):

		x = rearrange(x,"b d t -> b 1 d t")
		
		# Layer 1
		x = F.elu(self.conv1(x))
		x = self.norm1(x)
		x = F.dropout(x, 0.25)
		x = rearrange(x,"b cond h w ->b h cond w")

		# Layer 2
		x = self.padding1(x)
		x = F.elu(self.conv2(x))
		x = self.norm2(x)
		x = F.dropout(x, 0.25)
		x = self.pooling2(x)
		
		
		# Layer 3
		x = self.padding2(x)
		x = F.elu(self.conv3(x))
		x = self.norm3(x)
		x = F.dropout(x, 0.25)
		x = self.pooling3(x)

		x = rearrange(x,"b d1 d2 t -> b (d1 d2 t)")
		x = self.out_proj(x)
		return x

In [200]:
eegnet = EEGNetHead(128,256)

In [201]:
ModelSummary(eegnet)

   | Name     | Type        | Params
------------------------------------------
0  | conv1    | Conv2d      | 2.1 K 
1  | norm1    | BatchNorm2d | 32    
2  | padding1 | ZeroPad2d   | 0     
3  | conv2    | Conv2d      | 260   
4  | norm2    | BatchNorm2d | 8     
5  | pooling2 | MaxPool2d   | 0     
6  | padding2 | ZeroPad2d   | 0     
7  | conv3    | Conv2d      | 516   
8  | norm3    | BatchNorm2d | 8     
9  | pooling3 | MaxPool2d   | 0     
10 | out_proj | Linear      | 514   
------------------------------------------
3.4 K     Trainable params
0         Non-trainable params
3.4 K     Total params
0.014     Total estimated model params size (MB)

In [202]:
class DiffusionClf(lightning.LightningModule):

	def __init__(self,
			  model,
			  clf,
			  freeze=True):

		super().__init__()
		self.model = model.network
		if freeze:
			for param in self.model.parameters():
				param.requires_grad = False
		self.clf = clf

	def forward(self,
			 x):

		cond = torch.ones((x.shape[0],1,x.shape[-1]),device=self.device)
		x = torch.cat([x,cond],1)
		t = torch.zeros(len(x),device=self.device)
		time_embed = self.model.time_embbeder(t)
		time_embed = repeat(time_embed,"b t -> b t l",l=x.shape[-1])
		x = torch.cat([x,time_embed],1)
		x = self.model.conv_pool[0:-1](x)
		return x
	
	def classify(self,x):

		x = self.forward(x)
		x = self.clf(x)
		return x

In [203]:
head = ClassificationHead([128])
eegnet_head = EEGNetHead(128,256)

In [204]:
clf = DiffusionClf(diffusion_model,eegnet_head,freeze=False)

In [205]:
ModelSummary(clf)

  | Name  | Type       | Params
-------------------------------------
0 | model | LongConv   | 443 K 
1 | clf   | EEGNetHead | 3.4 K 
-------------------------------------
446 K     Trainable params
0         Non-trainable params
446 K     Total params
1.786     Total estimated model params size (MB)

In [206]:
clf.to("cuda")

DiffusionClf(
  (model): LongConv(
    (time_embbeder): SinusoidalPosEmb()
    (conv_pool): Sequential(
      (0): EfficientMaskedConv1d(
        (layer): Conv1d(15, 128, kernel_size=(65,), stride=(1,), padding=same, bias=False)
      )
      (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
      (3): SLConv(
        (kernel_list): ParameterList(
            (0): Parameter containing: [torch.float32 of size 3x128x65 (GPU 0)]
            (1): Parameter containing: [torch.float32 of size 3x128x65 (GPU 0)]
        )
      )
      (4): BatchNorm1d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): LeakyReLU(negative_slope=0.01)
      (6): EfficientMaskedConv1d(
        (layer): Conv1d(384, 128, kernel_size=(1,), stride=(1,), padding=same, bias=False)
      )
      (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): LeakyReLU(negative_slope

In [207]:
x = torch.rand((16,2,512),device=clf.device)

In [208]:
clf(x).shape

torch.Size([16, 128, 512])

In [209]:
clf.classify(x)

tensor([[ 0.2626,  2.3540],
        [ 1.7146,  2.0596],
        [ 0.1287,  1.9347],
        [ 1.2821,  1.9066],
        [ 0.3995,  1.9177],
        [ 0.8560,  2.3023],
        [ 0.7173,  1.8159],
        [ 1.2211,  1.5279],
        [-0.1140, -0.0468],
        [-0.1153,  1.6670],
        [ 0.9307,  1.6229],
        [ 1.4220,  1.7189],
        [-0.3206,  1.9613],
        [ 0.3001,  1.4987],
        [ 1.2071,  1.2802],
        [ 1.1532,  0.6494]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [210]:
ones = "../saved_models/raw_eeg/generated_ones.npy"
zeros = "../saved_models/raw_eeg/generated_zeros.npy"
fake_paths = (ones,zeros)

In [224]:
# slc_clf = DeepClassifier(
# 	model=clf.to(DEVICE),
# 	save_paths=["../../data/2b_iv/raw/"],
# 	fake_data=None,
# 	train_split=TRAIN_SPLIT,
# 	test_split=TEST_SPLIT,
# 	dataset=None,
# 	subject_dataset_type=subject_dataset,
# 	length=2.05,
# 	index_cutoff=512
# )

slc_clf = DeepClassifier(
	model=clf.to(DEVICE),
	save_paths=[csp_save_path],
	fake_data=None,
	train_split=train_split,
	test_split=test_split,
	dataset=None,
	dataset_type=OpenBCIDataset,
	subject_dataset_type=OpenBCISubject,
	channels=np.arange(0,2),
	subject_channels=["ch2","ch5"],
	length=2.0,
	epoch_length=512,
	index_cutoff=512
	)

Loading saved data
(416, 2, 512)
(416,)
final data shape: (416, 2, 512)
Loading saved data
(208, 2, 512)
(208,)
final data shape: (208, 2, 512)


In [212]:
optimizer = optim.AdamW([
	{"params":slc_clf.model.model.parameters(),"lr":2E-5,"weight_decay":1E-4},
	{"params":slc_clf.model.clf.parameters(),"lr":1E-3,"weight_decay":1E-4}
])

In [213]:
slc_clf.fit(fabric=FABRIC,
			 num_epochs=25,
			 lr=1E-3,
			 weight_decay=1E-4,
			 verbose=True,
			 optimizer=optimizer)

using specified optimizer
checkpointing
Epoch [1/25], Training Loss: 0.707, Training Accuracy: 56.56%, Validation Loss: 0.588, Validation Accuracy: 68.84%
Min loss: 0.5877278645833334 vs 0.58984375
Epoch [2/25], Training Loss: 0.647, Training Accuracy: 62.24%, Validation Loss: 0.590, Validation Accuracy: 69.69%
Min loss: 0.5877278645833334 vs 0.60205078125
Epoch [3/25], Training Loss: 0.614, Training Accuracy: 65.79%, Validation Loss: 0.602, Validation Accuracy: 66.57%
Min loss: 0.5877278645833334 vs 0.6100260416666666
Epoch [4/25], Training Loss: 0.579, Training Accuracy: 69.28%, Validation Loss: 0.610, Validation Accuracy: 74.22%
checkpointing
Epoch [5/25], Training Loss: 0.559, Training Accuracy: 71.64%, Validation Loss: 0.495, Validation Accuracy: 73.65%
Min loss: 0.4951171875 vs 0.4990234375
Epoch [6/25], Training Loss: 0.532, Training Accuracy: 73.29%, Validation Loss: 0.499, Validation Accuracy: 75.64%
Min loss: 0.4951171875 vs 0.5245768229166666
Epoch [7/25], Training Loss: 0.5

82.15297450424929

- 73.7 test when only using the last token and 1 second
- 83.85 for EEGNet classification head on 2 seconds and 3 channels
- 82.15 for EEGNet classification head on 2 seconds and 2 channels