In [2]:
import pandas as pd
from utils.data import Data

dataset_name = "one_track"

graph_df = pd.read_csv(f"out/{dataset_name}.csv")
sources = graph_df.u.values
destinations = graph_df.i.values
edge_idxs = graph_df.idx.values
labels = graph_df.label.values
timestamps = graph_df.ts.values

data = Data(sources, destinations, timestamps, edge_idxs, labels)

from utils import RandEdgeSampler, get_neighbor_finder

neighbor_finder = get_neighbor_finder(data, uniform=False)
random_sampler = RandEdgeSampler(sources, destinations)
size = len(sources)
_, negatives = random_sampler.sample(size)
print(size)

568


In [3]:
import torch
from models.tgn import TGN
import numpy as np


device = ("mps" if torch.backends.mps.is_available() else "cpu")

node_features = np.load(f"out/{dataset_name}_node.npy")
edge_features = np.load(f"out/{dataset_name}_edge.npy")

tgn = TGN(neighbor_finder=neighbor_finder, node_features=node_features,
          edge_features=edge_features, device=device,
          n_layers=1, n_heads=2, dropout=0.1, use_memory=True,
          message_dimension=100, memory_dimension=172,
          memory_update_at_start=False,
          embedding_module_type="attention",
          message_function="identity",
          aggregator_type="last",
          memory_updater_type="gru",
          n_neighbors=10,
          use_destination_embedding_in_message=False,
          use_source_embedding_in_message=False,
          dyrep=False)
tgn.load_state_dict(torch.load("out/tgn.pth"))
tgn.to(device)

TGN(
  (time_encoder): TimeEncoder(
    (w): Linear(in_features=1, out_features=172, bias=True)
  )
  (memory): Memory()
  (message_aggregator): LastMessageAggregator()
  (message_function): IdentityMessageFunction()
  (memory_updater): GRUMemoryUpdater(
    (memory): Memory()
    (layer_norm): LayerNorm((172,), eps=1e-05, elementwise_affine=True)
    (memory_updater): GRUCell(517, 172)
  )
  (embedding_module): AttentionEmbedding(
    (time_encoder): TimeEncoder(
      (w): Linear(in_features=1, out_features=172, bias=True)
    )
    (attention_models): ModuleList(
      (0): TemporalAttentionLayer(
        (merger): MergeLayer(
          (fc1): Linear(in_features=516, out_features=172, bias=True)
          (fc2): Linear(in_features=172, out_features=172, bias=True)
          (activation): ReLU()
        )
        (multi_head_target): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=344, out_features=344, bias=True)
        )
      )
    )
  )
  (a

In [18]:
src_embedding, dst_embedding, neg_embedding = tgn.compute_temporal_embeddings(
    source_nodes=sources,
    destination_nodes=destinations,
    negative_nodes=negatives,
    edge_times=timestamps,
    edge_idxs=edge_idxs,
)

In [21]:
from utils.edges import create_temporal_edges_with_ix, save_temporal_edges

parsed = np.load("data/parsed.npy", allow_pickle=True).item()
eeg_array = parsed["eeg_array"]
track_size = eeg_array.shape[-1]
sep_time = 2000
t_total = 4742624  # latest timestamp
for i in range(len(eeg_array)):
    edges = create_temporal_edges_with_ix(eeg_array, i, n_splits=12, corr_threshold=0.9, t_gap=t_total)
    save_temporal_edges(f"eeg_data_{i}", edges)
    t_total += track_size + sep_time

  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


In [22]:
from utils.data import process

for i in range(len(eeg_array)):
    process(f"eeg_data_{i}")

In [26]:
for i in range(len(eeg_array)):
    dataset_name = f"eeg_data_{i}"
    
    graph_df = pd.read_csv(f"out/{dataset_name}.csv")
    sources = graph_df.u.values
    destinations = graph_df.i.values
    edge_idxs = graph_df.idx.values
    labels = graph_df.label.values
    timestamps = graph_df.ts.values
    
    data = Data(sources, destinations, timestamps, edge_idxs, labels)
    
    neighbor_finder = get_neighbor_finder(data, uniform=False)
    random_sampler = RandEdgeSampler(sources, destinations)
    size = len(sources)
    _, negatives = random_sampler.sample(size)
    
    node_features = np.load(f"out/{dataset_name}_node.npy")
    edge_features = np.load(f"out/{dataset_name}_edge.npy")
    
    src_embedding, dst_embedding, neg_embedding = tgn.compute_temporal_embeddings(
        source_nodes=sources,
        destination_nodes=destinations,
        negative_nodes=negatives,
        edge_times=timestamps,
        edge_idxs=edge_idxs,
    )
    
    save_dict = {
        "src_embedding": src_embedding,
        "dst_embedding": dst_embedding,
        "neg_embedding": neg_embedding,
    }
    
    np.save(f"out/{dataset_name}_embedding.npy", save_dict)

In [5]:
loaded_dict = np.load("out/eeg_data_0_embedding.npy", allow_pickle=True).item()
loaded_dict["src_embedding"].shape

torch.Size([84, 172])

In [21]:
def decode_embedding(path):
    loaded_dict = np.load(path, allow_pickle=True).item()
    n_samples = len(loaded_dict["src_embedding"])
    return tgn.affinity_score(torch.cat([loaded_dict["src_embedding"], loaded_dict["src_embedding"]], dim=0), torch.cat([loaded_dict["dst_embedding"], loaded_dict["neg_embedding"]])).squeeze(dim=0)[:n_samples]

# Training

In [45]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

class CustomDataset(Dataset):
    def __init__(self, csv_file, max_sequence_length, indices=None):
        self.tabular_data = pd.read_csv(csv_file)
        if indices is not None:
            self.tabular_data = self.tabular_data.iloc[indices]
        self.max_sequence_length = max_sequence_length

    def __len__(self):
        return len(self.tabular_data)

    def __getitem__(self, idx):
        row = self.tabular_data.iloc[idx, 2:].to_numpy().astype(np.float32)
        features = torch.tensor(row, dtype=torch.float32)
        
        sequence = decode_embedding(f"out/eeg_data_{idx}_embedding.npy").detach().cpu()[0]
        padded_sequence = np.zeros(self.max_sequence_length)
        padded_sequence[:len(sequence)] = sequence[:self.max_sequence_length]
        padded_sequence = torch.tensor(padded_sequence, dtype=torch.float32)
        
        label = self.tabular_data.iloc[idx, 1]
        label = torch.tensor(label, dtype=torch.float32)
        
        return features, padded_sequence, label

In [46]:
from sklearn.model_selection import train_test_split

N = len(pd.read_csv("data/features.csv"))
indices = list(range(N))
train_indices, test_indices, _, _ = train_test_split(indices, indices, test_size=0.2, random_state=42)

train_dataset = CustomDataset(
    csv_file="data/features.csv",
    max_sequence_length=10000,
    indices=train_indices
)

test_dataset = CustomDataset(
    csv_file="data/features.csv",
    max_sequence_length=10000,
    indices=test_indices
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [47]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Regressor(nn.Module):
    def __init__(self, input_size):
        super(HybridModel, self).__init__()
        self.sequence_conv = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.sequence_pool = nn.AdaptiveMaxPool1d(output_size=1)
        
        self.fc1 = nn.Linear(input_size + 32, 128)  # 32 from sequence processing + features
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        
    def forward(self, x_tabular, x_sequence):
        x_sequence = x_sequence.unsqueeze(1)
        x_sequence = F.relu(self.sequence_conv(x_sequence))
        x_sequence = self.sequence_pool(x_sequence)
        x_sequence = x_sequence.view(x_sequence.size(0), -1)
        
        x_combined = torch.cat((x_tabular, x_sequence), dim=1)
        
        x = self.relu(self.fc1(x_combined))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [65]:
def train(model, criterion, optimizer, train_loader, epochs=100):
    model.train()
    losses = []
    for epoch in range(epochs):
        for batch_idx, (features, sequences, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            
            # Model forward pass
            output = model(features, sequences)
            
            loss = criterion(output.squeeze(), labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')
        losses.append(loss.item())
    return losses

model = Regressor(6)  # 6 is number of feature fields
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
losses = train(model, criterion, optimizer, train_loader, epochs=100)

Epoch 1, Loss: 2994.8486328125
Epoch 2, Loss: 1324.1185302734375
Epoch 3, Loss: 1067.29296875
Epoch 4, Loss: 613.8297729492188
Epoch 5, Loss: 598.9256591796875
Epoch 6, Loss: 825.1688232421875
Epoch 7, Loss: 405.662841796875
Epoch 8, Loss: 429.1819152832031
Epoch 9, Loss: 703.5924682617188
Epoch 10, Loss: 564.7113647460938
Epoch 11, Loss: 373.25811767578125
Epoch 12, Loss: 520.2412719726562
Epoch 13, Loss: 645.7669067382812
Epoch 14, Loss: 750.3186645507812
Epoch 15, Loss: 458.4432678222656
Epoch 16, Loss: 504.0113525390625
Epoch 17, Loss: 400.9752502441406
Epoch 18, Loss: 451.33294677734375
Epoch 19, Loss: 643.7055053710938
Epoch 20, Loss: 530.3818969726562
Epoch 21, Loss: 305.5623474121094
Epoch 22, Loss: 547.8779907226562
Epoch 23, Loss: 421.8311462402344
Epoch 24, Loss: 696.3097534179688
Epoch 25, Loss: 824.0570068359375
Epoch 26, Loss: 416.90814208984375
Epoch 27, Loss: 506.1241149902344
Epoch 28, Loss: 493.1675109863281
Epoch 29, Loss: 340.216552734375
Epoch 30, Loss: 249.2373046

In [66]:
def evaluate(model, criterion, test_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch_idx, (features, sequences, labels) in enumerate(test_loader):
            output = model(features, sequences)
            loss = criterion(output.squeeze(), labels)
            total_loss += loss.item()

    average_loss = total_loss / len(test_loader)
    print(f'Test Loss: {average_loss:.4f}')
    return average_loss

criterion = nn.MSELoss()
evaluate(model, criterion, test_loader)

Test Loss: 325.8663


325.8662872314453