In [None]:
import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import Dataset, DataLoader
import math

class MultimodalAuthenticationDataset(Dataset):
    def __init__(self, signature_data, eeg_data, labels, max_sig_len, max_eeg_len):
        """
        signature_data: Dictionary {subject_id: List of signature sequences (each seq: [256, 5])}
        eeg_data: Dictionary {subject_id: List of EEG feature sequences (each seq: [n_windows, 30])}
        labels: Dictionary {subject_id: label}
        max_sig_len: Maximum length of signature sequences (256 in your case)
        max_eeg_len: Maximum number of EEG windows across all subjects
        """
        self.signature_data = signature_data
        self.eeg_data = eeg_data
        self.labels = labels
        self.max_sig_len = max_sig_len
        self.max_eeg_len = max_eeg_len
        
        # Create list of all samples (each trial may have multiple sequences)
        self.samples = []
        for subject_id in signature_data.keys():
            for sig_seq, eeg_seq in zip(signature_data[subject_id], eeg_data[subject_id]):
                self.samples.append({
                    'signature': sig_seq,
                    'eeg': eeg_seq,
                    'label': labels[subject_id]
                })
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        sample = self.samples[idx]
        
        # Signature data
        sig_data = sample['signature']  # [seq_len, 5]
        sig_mask = torch.ones(sig_data.shape[0], dtype=torch.bool)
        
        # EEG data
        eeg_data = sample['eeg']  # [n_windows, 30]
        eeg_mask = torch.ones(eeg_data.shape[0], dtype=torch.bool)
        
        return {
            'signature': sig_data,
            'signature_mask': sig_mask,
            'eeg': eeg_data,
            'eeg_mask': eeg_mask,
            'label': sample['label']
        }

def collate_fn(batch):
    # Pad signature sequences
    sig_data = [item['signature'] for item in batch]
    sig_masks = [item['signature_mask'] for item in batch]
    sig_data_padded = torch.nn.utils.rnn.pad_sequence(sig_data, batch_first=True, padding_value=0)
    sig_masks_padded = torch.nn.utils.rnn.pad_sequence(sig_masks, batch_first=True, padding_value=0)
    
    # Pad EEG sequences
    eeg_data = [item['eeg'] for item in batch]
    eeg_masks = [item['eeg_mask'] for item in batch]
    eeg_data_padded = torch.nn.utils.rnn.pad_sequence(eeg_data, batch_first=True, padding_value=0)
    eeg_masks_padded = torch.nn.utils.rnn.pad_sequence(eeg_masks, batch_first=True, padding_value=0)
    
    # Labels
    labels = torch.tensor([item['label'] for item in batch])
    
    return {
        'signature': sig_data_padded,
        'signature_mask': sig_masks_padded,
        'eeg': eeg_data_padded,
        'eeg_mask': eeg_masks_padded,
        'label': labels
    }

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x + self.pe[:x.size(1)]
        return self.dropout(x)

In [None]:
class SignatureTransformer(nn.Module):
    def __init__(self, input_dim=5, d_model=128, nhead=8, num_layers=4, num_classes=70):
        super().__init__()
        self.d_model = d_model
        
        # Signature embedding
        self.sig_embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        
        # Transformer layers
        encoder_layers = TransformerEncoderLayer(d_model, nhead, dim_feedforward=256, dropout=0.1)
        self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers)
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, num_classes)
        )
    
    def forward(self, src, src_mask=None):
        # src: [batch_size, seq_len, input_dim]
        src = self.sig_embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        
        # Transformer expects [seq_len, batch_size, d_model]
        src = src.permute(1, 0, 2)
        
        output = self.transformer_encoder(src, src_key_padding_mask=~src_mask.bool() if src_mask is not None else None)
        
        # Use CLS token (first position) for classification
        cls_output = output[0, :, :]  # [batch_size, d_model]
        
        return self.classifier(cls_output)



In [None]:
class EEGTransformer(nn.Module):
    def __init__(self, input_dim=30, d_model=128, nhead=8, num_layers=4, num_classes=70):
        super().__init__()
        self.d_model = d_model
        
        # EEG embedding
        self.eeg_embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        
        # Transformer layers
        encoder_layers = TransformerEncoderLayer(d_model, nhead, dim_feedforward=256, dropout=0.1)
        self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers)
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, num_classes)
        )
    
    def forward(self, src, src_mask=None):
        # src: [batch_size, seq_len, input_dim]
        src = self.eeg_embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        
        # Transformer expects [seq_len, batch_size, d_model]
        src = src.permute(1, 0, 2)
        
        output = self.transformer_encoder(src, src_key_padding_mask=~src_mask.bool() if src_mask is not None else None)
        
        # Use CLS token (first position) for classification
        cls_output = output[0, :, :]  # [batch_size, d_model]
        
        return self.classifier(cls_output)

In [None]:
class MultimodalTransformer(nn.Module):
    def __init__(self, sig_input_dim=5, eeg_input_dim=30, d_model=128, nhead=8, 
                 num_layers=4, num_classes=70):
        super().__init__()
        
        # Signature branch
        self.sig_transformer = SignatureTransformer(sig_input_dim, d_model, nhead, num_layers, num_classes)
        
        # EEG branch
        self.eeg_transformer = EEGTransformer(eeg_input_dim, d_model, nhead, num_layers, num_classes)
        
        # Fusion and final classification
        self.fusion = nn.Sequential(
            nn.Linear(d_model * 2, d_model),
            nn.ReLU(),
            nn.Linear(d_model, num_classes)
        )
    
    def forward(self, sig_data, sig_mask, eeg_data, eeg_mask):
        # Process signature data
        sig_features = self.sig_transformer.sig_embedding(sig_data) * math.sqrt(self.sig_transformer.d_model)
        sig_features = self.sig_transformer.pos_encoder(sig_features)
        sig_features = sig_features.permute(1, 0, 2)
        sig_output = self.sig_transformer.transformer_encoder(
            sig_features, 
            src_key_padding_mask=~sig_mask.bool() if sig_mask is not None else None
        )
        sig_cls = sig_output[0, :, :]  # [batch_size, d_model]
        
        # Process EEG data
        eeg_features = self.eeg_transformer.eeg_embedding(eeg_data) * math.sqrt(self.eeg_transformer.d_model)
        eeg_features = self.eeg_transformer.pos_encoder(eeg_features)
        eeg_features = eeg_features.permute(1, 0, 2)
        eeg_output = self.eeg_transformer.transformer_encoder(
            eeg_features, 
            src_key_padding_mask=~eeg_mask.bool() if eeg_mask is not None else None
        )
        eeg_cls = eeg_output[0, :, :]  # [batch_size, d_model]
        
        # Combine features
        combined = torch.cat([sig_cls, eeg_cls], dim=1)
        return self.fusion(combined)

In [None]:
def train_model(model, dataloader, criterion, optimizer, device, num_epochs=10):
    model.train()
    model.to(device)
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for batch in dataloader:
            # Move data to device
            sig_data = batch['signature'].to(device)
            sig_mask = batch['signature_mask'].to(device)
            eeg_data = batch['eeg'].to(device)
            eeg_mask = batch['eeg_mask'].to(device)
            labels = batch['label'].to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(sig_data, sig_mask, eeg_data, eeg_mask)
            
            # Calculate loss
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(dataloader)
        epoch_acc = 100 * correct / total
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')



In [None]:
from auth_script_main import get_feature_vectors_for_all_users, get_list_of_user_ids

In [None]:
# sign_features_final, eeg_features_final = get_feature_vectors_for_all_users()
# 

In [None]:
# user_ids = get_list_of_user_ids()
# user_ids

In [None]:
# len(sign_features_final['002108410300042'][0][1][0])

def get_aggregated_feature_data_per_user(sign_features_final, eeg_features_final, user_ids):
    # print("-----------------------------Sign Feature Data aggregation-----------------------------------")
    signature_features_aggregated = {}
    eeg_features_aggregated = {}
    for user in user_ids:
        sign_data = sign_features_final[user]
        batches_concatenated = []
        attention_masks_concatenated = []
        # print("User ID: ", user)
        for trial in sign_data:
            for batch in trial:
                feature_data = batch[0]
                attention_mask = batch[1]
                # print("     Sign Feature Data shape: ",feature_data.shape)
                batches_concatenated.append(feature_data)
                attention_masks_concatenated.append(attention_mask)
        signature_features_aggregated[user] = batches_concatenated
        # print("-----------------------------------------------------------------------------------------")
    # print("----------------------------EEG Feature Data aggregation-------------------------------------")
    for user in user_ids:
        eeg_data = eeg_features_final[user]
        batches_concatenated = []
        attention_masks_concatenated = []
        # print("User ID: ", user)
        for trial in eeg_data:
            feature_data = trial[0]
            attention_mask = trial[1]
            # print("     EEG Feature Data shape: ", feature_data.shape)
            batches_concatenated.append(feature_data)
            attention_masks_concatenated.append(attention_mask)
        eeg_features_aggregated[user] = batches_concatenated
        # print("-----------------------------------------------------------------------------------------")
    labels = range(0, len(user_ids))
    user_id_mapping = {}
    for user_id, label in zip(user_ids, labels):
        user_id_mapping[user_id] = label

    return signature_features_aggregated, eeg_features_aggregated, user_id_mapping



In [None]:
# sign_features_final['000000000200894']
# eeg_features_final['000000000200894'][0][0].shape

In [None]:
# signature_data, eeg_data, labels = get_aggregated_feature_data_per_user()

In [None]:
# len(signature_data['000000001045402'])

In [None]:
if __name__ == "__main__":

    sig_input_dim = 7
    eeg_input_dim = 30 
    d_model = 128
    nhead = 8
    num_layers = 4
    num_classes = 70  # Number of subjects
    batch_size = 32
    learning_rate = 0.001
    num_epochs = 20

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # signature_data: Dict {subject_id: List of signature sequences}
    # eeg_data: Dict {subject_id: List of EEG feature sequences}
    # labels: Dict {subject_id: label (0-69)}

    # IMPORTANT: pleeaaasssseeeeeee don't uncomment.........
    # dont uncomment this line if sign and eeg feature data is already in memory
    sign_features_final, eeg_features_final = get_feature_vectors_for_all_users()
    user_ids = get_list_of_user_ids()


    signature_data, eeg_data, labels = get_aggregated_feature_data_per_user(sign_features_final, eeg_features_final, user_ids)
    # Create dataset and dataloader
    max_sig_len = 256
    max_eeg_len = max([max([seq.shape[0] for seq in seqs]) for seqs in eeg_data.values()]) if eeg_data else 100
    
    dataset = MultimodalAuthenticationDataset(signature_data, eeg_data, labels, max_sig_len, max_eeg_len)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    
    # Initialize model, loss, optimizer
    model = MultimodalTransformer(
        sig_input_dim=sig_input_dim,
        eeg_input_dim=eeg_input_dim,
        d_model=d_model,
        nhead=nhead,
        num_layers=num_layers,
        num_classes=num_classes
    )
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Train the model
    train_model(model, dataloader, criterion, optimizer, device, num_epochs=num_epochs)

In [None]:
print(torch.version.cuda)