In [1]:
from sklearn.metrics import f1_score
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.cluster import KMeans

from torch.utils.data import TensorDataset, DataLoader, Dataset

In [2]:
test_data = np.load('dataset/test-release.npy', allow_pickle=True).item()
train_data = np.load('dataset/train_split.npy', allow_pickle=True).item()
val_data = np.load('dataset/val_split.npy', allow_pickle=True).item()

In [3]:
test_data['sequences']['3c8a57ba51']['keypoints'].shape

(19492, 2, 2, 7)

In [4]:
test_data['sequences']['3c8a57ba51']['annotator_id']

0

In [5]:
# train data processing
train_cluster_features = []

for sequence in train_data['sequences'].values():
    keypoints = sequence['keypoints']
    annotations = sequence['annotations']

    num_frames = keypoints.shape[0]
    features_flat = keypoints.reshape(num_frames, -1)

    train_cluster_features.append(features_flat)

train_cluster_features = np.concatenate(train_cluster_features, axis=0)


print(train_cluster_features.shape)

(405088, 28)


In [6]:
# test data processing
test_cluster_features = []

for sequence in test_data['sequences'].values():
    keypoints = sequence['keypoints']

    num_frames = keypoints.shape[0]
    features_flat = keypoints.reshape(num_frames, -1)

    test_cluster_features.append(features_flat)

test_cluster_features = np.concatenate(test_cluster_features, axis=0)

print(test_cluster_features.shape)

(8168491, 28)


In [7]:
# combine train and val data
combined_features = np.concatenate([train_cluster_features, test_cluster_features], axis=0)
print(combined_features.shape)

(8573579, 28)


In [8]:
# cluster the combined features into 20 clusters
n_clusters = 20
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(combined_features)

print(cluster_labels.shape)
np.save('./dataset/cluster_labels.npy', cluster_labels)

(8573579,)


In [9]:
# train data processing
# train data processing
train_features = []
train_labels = []
window_size = 99

for sequence in train_data['sequences'].values():
    keypoints = sequence['keypoints']
    annotations = sequence['annotations']

    keypoints = np.concatenate((np.zeros((window_size, 2, 2, 7)), keypoints, np.zeros((window_size, 2, 2, 7))), axis=0)
    for i in range(window_size, keypoints.shape[0] - window_size):
        window_after = keypoints[i+1:i+window_size+1] 
        window_after_flat = window_after.reshape(window_size, -1)
        
        window_before = keypoints[i-window_size:i]
        window_before_flat = window_before.reshape(window_size, -1)
        
        features = np.concatenate((window_before_flat, keypoints[i].reshape(1, -1), window_after_flat), axis=0)
        train_features.append(features)
        train_labels.append(annotations[i - window_size])

train_features = np.array(train_features).transpose((0, 2, 1))  # Transpose to match the expected input shape
train_labels = np.array(train_labels)

print(train_features.shape)
print(train_labels.shape)

(405088, 28, 199)
(405088,)


In [10]:
# validation data processing
val_features = []
val_labels = []
window_size = 99

for sequence in val_data['sequences'].values():
    keypoints = sequence['keypoints']
    annotations = sequence['annotations']

    keypoints = np.concatenate((np.zeros((window_size, 2, 2, 7)), keypoints, np.zeros((window_size, 2, 2, 7))), axis=0)
    for i in range(window_size, keypoints.shape[0] - window_size):
        window_after = keypoints[i+1:i+window_size+1] 
        window_after_flat = window_after.reshape(window_size, -1)
        
        window_before = keypoints[i-window_size:i]
        window_before_flat = window_before.reshape(window_size, -1)
        
        features = np.concatenate((window_before_flat, keypoints[i].reshape(1, -1), window_after_flat), axis=0)
        val_features.append(features)
        val_labels.append(annotations[i - window_size])

val_features = np.array(val_features).transpose((0, 2, 1))  
val_labels = np.array(val_labels)


print(val_features.shape)
print(val_labels.shape)

(102650, 28, 199)
(102650,)


In [11]:
class ClusterLabeledDataset(Dataset):
    def __init__(self, data, labels, num_frames):
        super(ClusterLabeledDataset).__init__()
        self.data = data
        self.num_frames = num_frames
        self.lengths = np.cumsum(list(map(len, data)))
        self.labels = [
            labels[self.lengths[i - 1] if i > 0 else 0: self.lengths[i]]
            for i in range(len(self.lengths))
        ]

    def __len__(self):
        return self.lengths[-1]

    def __getitem__(self, index):
        seq_index = self._find_seq_index(index)
        video = self.data[seq_index]
        frame_index = index if seq_index == 0 else index - self.lengths[seq_index - 1]
        frame = video[[frame_index]]
        window_size = (self.num_frames - 1) // 2
        if frame_index >= window_size:
            window_before = video[frame_index - window_size: frame_index]
        else:
            pad = np.zeros((window_size - frame_index, frame.shape[1]))
            window_before = np.concatenate((pad, video[0: frame_index]), axis=0)
        if frame_index + window_size < len(video):
            window_after = video[frame_index + 1: frame_index + window_size + 1]
        else:
            pad = np.zeros((window_size - (len(video) - 1 - frame_index), frame.shape[1]))
            window_after = np.concatenate((video[frame_index + 1:], pad), axis=0)
        data = np.concatenate((window_before, frame, window_after), axis=0)
        label = self.labels[seq_index][frame_index]
        return torch.from_numpy(data).permute(1, 0), torch.tensor(label)

    def _find_seq_index(self, index):
        start, end = 0, len(self.lengths) - 1
        while start + 1 < end:
            mid = (start + end) // 2
            if self.lengths[mid] <= index:
                start = mid
            else:
                end = mid
        if self.lengths[start] > index:
            return start
        return end

In [12]:
combined_labels = np.load('./dataset/cluster_labels.npy')

In [13]:
cluster_dataset = ClusterLabeledDataset(
    (
        [seq['keypoints'].reshape(-1, 28) for seq in train_data['sequences'].values()] +
        [seq['keypoints'].reshape(-1, 28) for seq in test_data['sequences'].values()]
    ),
    combined_labels,
    199
)


In [14]:

class BaseModel(nn.Module):
    def __init__(self, input_dim):
        super(BaseModel, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(input_dim)

        self.conv1 = nn.Conv1d(input_dim, 512, kernel_size=5, padding=2)
        self.batch_norm2 = nn.BatchNorm1d(512)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.maxpool1 = nn.MaxPool1d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv1d(512, 256, kernel_size=5, padding=2)
        self.batch_norm3 = nn.BatchNorm1d(256)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.maxpool2 = nn.MaxPool1d(kernel_size=2, stride=2)

        self.fc = nn.Linear(256, 4)
        self.fc_cluster = nn.Linear(256, 20)

    def forward(self, x, is_cluster=False):
        x = self.batch_norm1(x)
        x = self.conv1(x)
        x = self.batch_norm2(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.batch_norm3(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.maxpool2(x)
        x = torch.mean(x, dim=2)
        x = x.squeeze(-1)
        if is_cluster:
            x = self.fc_cluster(x)
        else:
            x = self.fc(x)       
        return x

In [15]:
def validation(model, val_features, val_labels, device='cuda' if torch.cuda.is_available() else 'mps'):
    X = torch.tensor(val_features, dtype=torch.float32)
    y = torch.tensor(val_labels, dtype=torch.long)
    
    dataset = TensorDataset(X, y)
    dataloader = DataLoader(dataset, batch_size=256, shuffle=True)
    
    model = model.to(device)
    model.eval()
    
    total_predictions = 0
    correct_predictions = 0
    labels = []
    total_predicted = []
    with torch.no_grad():
        for batch_X, batch_y in dataloader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += batch_y.size(0)
            correct_predictions += (predicted == batch_y).sum().item()
            labels = labels + batch_y.tolist()
            total_predicted = total_predicted + predicted.tolist()
    F1 = f1_score(labels, total_predicted, average='macro', labels=[0, 1, 2])
    print(f"Val-F1: {F1:.4f}")
    return F1

In [None]:
input_dim = 28
device = 'cuda' if torch.cuda.is_available() else 'mps'
batch_size = 32
epochs = 30
lr = 0.001

model = BaseModel(input_dim=input_dim)
model = model.to(device)

train_features = torch.tensor(train_features).to(device, dtype=torch.float32)
train_labels = torch.tensor(train_labels).to(device, dtype=torch.long)

train_dataset = TensorDataset(train_features, train_labels)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
cluster_dataloader = DataLoader(cluster_dataset, batch_size=batch_size, shuffle=True)


optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(epochs):
    model.train()
    total_loss = 0
    total_predictions = 0
    correct_predictions = 0
    labels = []
    total_predicted = []
    for batch_X, batch_y in train_dataloader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_X, is_cluster=False)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        total_predictions += batch_y.size(0)
        correct_predictions += (predicted == batch_y).sum().item()
        total_loss += loss.item() * batch_X.size(0)
        labels = labels + batch_y.tolist()
        total_predicted = total_predicted + predicted.tolist()

    for batch_X, batch_y in cluster_dataloader:
        batch_X, batch_y = batch_X.to(device, dtype=torch.float32), batch_y.to(device, dtype=torch.long)
        optimizer.zero_grad()
        outputs = model(batch_X, is_cluster=True)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(train_dataset)
    accuracy = 100 * correct_predictions / total_predictions
    F1 = f1_score(labels, total_predicted, average='macro', labels=[0, 1, 2])
    print(f"Epoch {epoch+1}/{epochs} - Train-Loss: {avg_loss:.4f} - Train-Accuracy: {accuracy:.2f}% - Train-F1: {F1:.4f}")
    validation(model, val_features, val_labels, device='cuda' if torch.cuda.is_available() else 'mps')
print("Training complete.")

In [None]:
# train_f1 = [
#     0.5643, 0.6331, 0.6779, 0.6972, 0.7092, 0.7175, 0.7242, 0.7288, 0.7350, 0.7353,
#     0.7401, 0.7405, 0.7461, 0.7493, 0.7512, 0.7530, 0.7554, 0.7582, 0.7610, 0.7631,
#     0.7653, 0.7670, 0.7695, 0.7713, 0.7734, 0.7752, 0.7770, 0.7784, 0.7800, 0.7823
# ]
# val_f1 = [
#     0.1121, 0.2059, 0.2052, 0.2830, 0.2153, 0.2716, 0.2600, 0.2490, 0.2837, 0.3076,
#     0.3064, 0.3018, 0.3261, 0.3254, 0.3368, 0.3401, 0.3423, 0.3395, 0.3478, 0.3522,
#     0.3451, 0.3486, 0.3507, 0.3559, 0.3604, 0.3590, 0.3638, 0.3612, 0.3647, 0.3681
# ]

In [None]:
'''
Epoch 1/30 - Train-Loss: 0.4760 - Train-Accuracy: 80.49% - Train-F1: 0.5643
Val-F1: 0.1121
Epoch 2/30 - Train-Loss: 0.4232 - Train-Accuracy: 82.62% - Train-F1: 0.6331
Val-F1: 0.2059
Epoch 3/30 - Train-Loss: 0.3917 - Train-Accuracy: 83.89% - Train-F1: 0.6779
Val-F1: 0.2052
Epoch 4/30 - Train-Loss: 0.3780 - Train-Accuracy: 84.40% - Train-F1: 0.6972
Val-F1: 0.2830
Epoch 5/30 - Train-Loss: 0.3695 - Train-Accuracy: 84.72% - Train-F1: 0.7092
Val-F1: 0.2153
Epoch 6/30 - Train-Loss: 0.3621 - Train-Accuracy: 85.04% - Train-F1: 0.7175
Val-F1: 0.2716
Epoch 7/30 - Train-Loss: 0.3578 - Train-Accuracy: 85.27% - Train-F1: 0.7242
Val-F1: 0.2600
Epoch 8/30 - Train-Loss: 0.3540 - Train-Accuracy: 85.35% - Train-F1: 0.7288
Val-F1: 0.2490
Epoch 9/30 - Train-Loss: 0.3506 - Train-Accuracy: 85.52% - Train-F1: 0.7350
Val-F1: 0.2837
Epoch 10/30 - Train-Loss: 0.3484 - Train-Accuracy: 85.56% - Train-F1: 0.7353
Val-F1: 0.3076
Epoch 11/30 - Train-Loss: 0.3451 - Train-Accuracy: 85.74% - Train-F1: 0.7401
Val-F1: 0.3064
Epoch 12/30 - Train-Loss: 0.3432 - Train-Accuracy: 85.79% - Train-F1: 0.7405
Val-F1: 0.3018
Epoch 13/30 - Train-Loss: 0.3409 - Train-Accuracy: 85.94% - Train-F1: 0.7461
Val-F1: 0.3261
Epoch 14/30 - Train-Loss: 0.3391 - Train-Accuracy: 86.02% - Train-F1: 0.7493
Val-F1: 0.3254
Epoch 15/30 - Train-Loss: 0.3375 - Train-Accuracy: 86.10% - Train-F1: 0.7512
Val-F1: 0.3368
Epoch 16/30 - Train-Loss: 0.3358 - Train-Accuracy: 86.19% - Train-F1: 0.7530
Val-F1: 0.3401
Epoch 17/30 - Train-Loss: 0.3346 - Train-Accuracy: 86.28% - Train-F1: 0.7554
Val-F1: 0.3423
Epoch 18/30 - Train-Loss: 0.3329 - Train-Accuracy: 86.35% - Train-F1: 0.7582
Val-F1: 0.3395
Epoch 19/30 - Train-Loss: 0.3314 - Train-Accuracy: 86.45% - Train-F1: 0.7610
Val-F1: 0.3478
Epoch 20/30 - Train-Loss: 0.3297 - Train-Accuracy: 86.54% - Train-F1: 0.7631
Val-F1: 0.3522
Epoch 21/30 - Train-Loss: 0.3282 - Train-Accuracy: 86.62% - Train-F1: 0.7653
Val-F1: 0.3451
Epoch 22/30 - Train-Loss: 0.3271 - Train-Accuracy: 86.68% - Train-F1: 0.7670
Val-F1: 0.3486
Epoch 23/30 - Train-Loss: 0.3256 - Train-Accuracy: 86.75% - Train-F1: 0.7695
Val-F1: 0.3507
Epoch 24/30 - Train-Loss: 0.3243 - Train-Accuracy: 86.83% - Train-F1: 0.7713
Val-F1: 0.3559
Epoch 25/30 - Train-Loss: 0.3228 - Train-Accuracy: 86.90% - Train-F1: 0.7734
Val-F1: 0.3604
Epoch 26/30 - Train-Loss: 0.3216 - Train-Accuracy: 86.98% - Train-F1: 0.7752
Val-F1: 0.3590
Epoch 27/30 - Train-Loss: 0.3201 - Train-Accuracy: 87.03% - Train-F1: 0.7770
Val-F1: 0.3638
Epoch 28/30 - Train-Loss: 0.3190 - Train-Accuracy: 87.09% - Train-F1: 0.7784
Val-F1: 0.3612
Epoch 29/30 - Train-Loss: 0.3175 - Train-Accuracy: 87.14% - Train-F1: 0.7800
Val-F1: 0.3647
Epoch 30/30 - Train-Loss: 0.3162 - Train-Accuracy: 87.20% - Train-F1: 0.7823
Val-F1: 0.3681
'''