In [3]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# Configuration
SEQUENCE_LENGTH = 30
BATCH_SIZE = 16
NORMALIZATION_RANGE = (0, 1)

TRAIN_IDS = pd.read_csv('./data/Infant Pose Data/gma_score_prediction/train.csv')
TRAIN_IDS = TRAIN_IDS['gma_id']


# Custom Dataset Class
class TimeSeriesDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return torch.tensor(self.sequences[idx], dtype=torch.float32)

# Function to normalize data
def normalize(data, min_val=0, max_val=1):
    return (data - np.min(data, axis=0)) / (np.ptp(data, axis=0))

# Function to create fixed-length sequences
def create_fixed_length_sequences(data, seq_length):
    return np.array([data[i:i+seq_length] for i in range(len(data) - seq_length + 1)])

# Function to process a single file
def process_file(file_path):
    # Load CSV
    data = pd.read_csv(file_path)
    # Group by frame and extract keypoints
    grouped = data.groupby(['video_number', 'frame'])

    def process_group(group):
        frame_data = group.sort_values('part_idx')[['x', 'y']].values.flatten()
        return frame_data

    sequences = grouped.apply(process_group).values
    sequences = np.array([seq for seq in sequences])

    # Normalize the sequences
    try:
        normalized_sequences = (sequences - np.min(sequences, axis=0)) / (np.ptp(sequences, axis=0))
        return normalized_sequences
    except Exception as e:
        print(f"Error normalizing sequences: {e}")
        return None
    

# Function to process all files in a directory and save iteratively
def process_directory(directory, output_file):
    # Open the file for writing
    with open(output_file, 'w') as f:
        header_written = False  # Flag to write the header only once
        for file_name in tqdm(os.listdir(directory)):
            if file_name.endswith(".csv"):
                # check if file is in train_ids
                
                if int(file_name.split(".")[0]) not in TRAIN_IDS:
                    print(f"Skipping file: {file_name.split('.')[0]}")
                    continue

                file_path = os.path.join(directory, file_name)
                # print(f"Processing file: {file_name}")
                sequences = process_file(file_path)

                # Write sequences to the file
                try:
                    for sequence in sequences:
                        if not header_written:
                            # Write header (feature names like x1, y1, x2, y2, ...)
                            header = ','.join([f"x{i//2+1}" if i % 2 == 0 else f"y{i//2+1}" for i in range(sequence.shape[0])])
                            f.write(header + '\n')
                            header_written = True
                        f.write(','.join(map(str, sequence)) + '\n')
                except Exception as e:
                    print(f"Error writing sequences to file: {file_name.split('.')[0]}")

    print(f"Processed data saved to {output_file}")




In [4]:
input_directory = "pose_estimates/gma_score_prediction_pose_estimates"  # Replace with your folder path
output_file = "processed_data.npy"  # Save processed data for reuse


In [5]:
import gc

def process_directory(directory, output_file):
    # Open the file for writing
    with open(output_file, 'w') as f:
        header_written = False  # Flag to write the header only once
        for file_name in tqdm(os.listdir(directory)):
            if file_name.endswith(".csv"):
                # Check if file is in TRAIN_IDS
                if int(file_name.split(".")[0]) not in TRAIN_IDS:
                    print(f"Skipping file: {file_name.split('.')[0]}")
                    continue

                file_path = os.path.join(directory, file_name)
                # Process the file
                sequences = process_file(file_path)

                # Write sequences to the file
                try:
                    for sequence in sequences:
                        if not header_written:
                            # Write header
                            header = ','.join([f"x{i//2+1}" if i % 2 == 0 else f"y{i//2+1}" for i in range(sequence.shape[0])])
                            f.write(header + '\n')
                            header_written = True
                        f.write(','.join(map(str, sequence)) + '\n')
                except Exception as e:
                    print(f"Error writing sequences to file: {file_name.split('.')[0]}")

                # Explicitly free up memory
                del sequences
                gc.collect()


In [None]:
# Procoess all files
process_directory(input_directory, output_file)

In [None]:
# Load processed data
data = pd.read_csv(output_file)
data = data.to_numpy()

seq_len = 60
stride = 30  # Overlap between sequences
sequences = []
for i in range(0, len(data) - seq_len, stride):
    sequences.append(data[i:i+seq_len])
sequences = np.array(sequences)  # Shape: (num_sequences, seq_len, 36)

sequences.shape


In [21]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

In [6]:
import torch
tensor_data = torch.tensor(sequences, dtype=torch.float32)

In [7]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Positional Encoding
class PositionalEncoding(nn.Module):
    def __init__(self, embed_dim, max_seq_len):
        super().__init__()
        position = torch.arange(0, max_seq_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, embed_dim, 2) * (-torch.log(torch.tensor(10000.0)) / embed_dim))
        pe = torch.zeros(max_seq_len, embed_dim)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

# Masked Time Series Transformer
class MaskedTimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim=36, embed_dim=18, num_heads=8, num_layers=4, max_seq_len=500):
        super().__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)
        self.positional_encoding = PositionalEncoding(embed_dim, max_seq_len)
        self.transformer = nn.Transformer(
            d_model=embed_dim,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            dim_feedforward=512,
            batch_first=True  # Ensures (batch, seq, embed_dim) compatibility
        )
        self.output_layer = nn.Linear(embed_dim, input_dim)

    def forward(self, x, mask=None):
        # Apply input embedding and positional encoding
        x = self.embedding(x)
        x = self.positional_encoding(x)
        # Apply transformer with source key padding mask
        x = self.transformer(x, x, src_key_padding_mask=mask)
        # Output layer to reconstruct original dimensions

        return self.output_layer(x)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Example parameters
batch_size = 32
seq_len = 60
input_dim = 36
mask_prob = 0.20  # Probability of masking each feature
epochs = 100
learning_rate = 0.001

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


In [None]:

# Initialize model, loss function, and optimizer
model = MaskedTimeSeriesTransformer(input_dim=input_dim, embed_dim=32, num_heads=8, num_layers=4, max_seq_len=500)
model = model.to(device)  # Move model to GPU

criterion = nn.MSELoss()  # Reconstruction loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Create Dataset and DataLoader
dataset = TensorDataset(tensor_data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

losses = []

# Function to create masked inputs
def create_masked_inputs(x, mask_prob):
    """
    Masks a portion of the input for training.
    """
    mask = torch.rand(x.shape) < mask_prob  # Boolean mask for features
    x_masked = x.clone()
    x_masked[mask] = 0.0  # Replace masked values with 0
    return x_masked, mask

# Training loop
for epoch in range(epochs):
    model.train()
    epoch_loss = 0

    for batch in dataloader:
        x = batch[0].to(device)  # Extract batch data

        # Step 1: Create masked inputs and mask
        x_masked, mask = create_masked_inputs(x, mask_prob)

        # Step 2: Forward pass
        output = model(x_masked)

        # Step 3: Compute loss (only for masked positions)
        loss = criterion(output[mask], x[mask])

        # Step 4: Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Step 5: Update weights
        optimizer.step()

        # Track loss for the epoch
        epoch_loss += loss.item()

    # Print epoch loss
    avg_loss = epoch_loss / len(dataloader)
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")
    # append losses to a list
    losses.append(avg_loss)
    torch.save(model.state_dict(), f"model_epoch_{epoch + 1}.pth")


In [None]:
# plot the learning curve
import matplotlib.pyplot as plt

plt.plot(losses)

In [None]:
TEST_IDS = pd.read_csv('./data/Infant Pose Data/gma_score_prediction/test.csv')
TEST_CLASS = TEST_IDS['final_assessment_score_2']
TEST_IDS = TEST_IDS['gma_id']

# randomly select a video from the TEST_IDS 
import random
video_id = random.choice(TEST_IDS)
label = TEST_CLASS[TEST_IDS == video_id].values[0]

# Load the video data
sequences = process_file(f"pose_estimates/gma_score_prediction_pose_estimates/{video_id}.csv")

# Load pre-trained model
model = MaskedTimeSeriesTransformer(input_dim=36, embed_dim=128, num_heads=8, num_layers=4, max_seq_len=500)
model.load_state_dict(torch.load(f"model_epoch_{epoch + 1}.pth"))  # Load trained model weights
model.to(device)
model.eval()

# get all latents for all videos in the test set



# Load the video data


latent = model.embedding(torch.tensor(sequences, dtype=torch.float32).to(device))

print(latent.shape, label)

In [None]:
import torch
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Load test metadata
TEST_IDS = pd.read_csv('./data/Infant Pose Data/gma_score_prediction/test.csv')
TEST_CLASS = TEST_IDS['final_assessment_score_2']  # Labels
TEST_IDS = TEST_IDS['gma_id']  # Video IDs

# Load pre-trained model
model = MaskedTimeSeriesTransformer(input_dim=36, embed_dim=32, num_heads=8, num_layers=4, max_seq_len=500)
model.load_state_dict(torch.load(f"model_epoch_{epoch + 1}.pth"))  # Load trained model weights
model.to(device)
model.eval()

# Function to extract latent embeddings for a video
def extract_video_latents(video_id):
    """
    Process a single video and extract its latent embeddings.
    Args:
        video_id: ID of the video to process.
    Returns:
        latent_embeddings: Tensor of latent embeddings for all sequences.
    """
    # Load the video data
    sequences = process_file(f"pose_estimates/gma_score_prediction_pose_estimates/{video_id}.csv")
    sequences = torch.tensor(sequences, dtype=torch.float32).to(device)  # Convert to tensor and move to GPU/CPU as needed

    with torch.no_grad():
        # Extract latent embeddings for all sequences
        latent = model.embedding(torch.tensor(sequences, dtype=torch.float32).to(device))


    return latent

# Extract latent embeddings for all videos in the test set
all_latents = []
all_labels = []

for video_id, label in zip(TEST_IDS, TEST_CLASS):
    latents = extract_video_latents(video_id)  # Get latent embeddings for this video
    # match the embeddings with a label
    all_latents.append({label: latents}) # Store embeddings with label
    
# # Combine all latent embeddings and labels
# all_latents = torch.cat(all_latents, dim=0)  # Shape: (total_sequences, embed_dim)
# all_labels = torch.tensor(all_labels, dtype=torch.long)  # Shape: (total_sequences,)

# print(f"Extracted embeddings shape: {all_latents.shape}")
# print(f"Labels shape: {all_labels.shape}")

# Save embeddings and labels for future use (optional)
# Flatten the list of dictionaries and save embeddings and labels
flattened_embeddings = []
flattened_labels = []

for entry in all_latents:
    for label, latents in entry.items():
        flattened_embeddings.append(latents)
        flattened_labels.extend([label] * latents.shape[0])

flattened_embeddings = torch.cat(flattened_embeddings, dim=0)
flattened_labels = torch.tensor(flattened_labels, dtype=torch.long)

torch.save({"embeddings": flattened_embeddings, "labels": flattened_labels}, "test_latents.pth")


In [None]:
import torch
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Example dictionary with labels as keys and embeddings as values
embeddings_dict = all_latents
# Step 1: Flatten the dictionary
all_embeddings = []
all_labels = []

for entry in embeddings_dict:
    for label, embeddings in entry.items():
        all_embeddings.append(embeddings)  # Append embeddings
        all_labels.extend([label] * embeddings.shape[0])  # Append corresponding labels

# Step 2: Concatenate embeddings and labels
all_embeddings = torch.cat(all_embeddings, dim=0)  # Shape: (total_sequences, embed_dim)
all_labels = torch.tensor(all_labels, dtype=torch.float32)  # Shape: (total_sequences,)

# Step 3: Perform PCA
pca = PCA(n_components=3)
pca_result = pca.fit_transform(all_embeddings.cpu().numpy())  # Convert embeddings to numpy for PCA

# Step 4: Visualize the principal components in pairs, with one subplot per pair 

fig, axs = plt.subplots(3, 3, figsize=(15, 15))
fig.suptitle("Principal Component Analysis", fontsize=20)

for i in range(3):
    for j in range(3):
        if i != j:
            axs[i, j].scatter(pca_result[:, i], pca_result[:, j], c=all_labels, cmap='viridis', alpha=0.5)
            axs[i, j].set_xlabel(f"PC{i + 1}")
            axs[i, j].set_ylabel(f"PC{j + 1}")
        else:
            fig.delaxes(axs[i, j])  # Remove empty subplots

plt.tight_layout()
plt.show()


In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

# Perform k-means clustering
kmeans = KMeans(n_clusters=2, random_state=42)
cluster_labels = kmeans.fit_predict(all_embeddings.cpu().numpy())

# Evaluate clustering against ground truth
ari_score = adjusted_rand_score(all_labels.numpy(), cluster_labels)
print(f"Adjusted Rand Index (ARI): {ari_score:.2f}")



In [None]:
# visualize a reconstruction of the data from the embeddings 

# Load the video data
sequences = process_file(f"pose_estimates/gma_score_prediction_pose_estimates/{video_id}.csv")

# Load pre-trained model
model = MaskedTimeSeriesTransformer(input_dim=36, embed_dim=32, num_heads=8, num_layers=4, max_seq_len=500)
model.load_state_dict(torch.load(f"model_epoch_{epoch + 1}.pth"))  # Load trained model weights
model.to(device)
model.eval()

# Extract latent embeddings for the video
latent = extract_video_latents(video_id)

# Reconstruct the video data
reconstructed = model.output_layer(latent)

# Plot the original and reconstructed sequences
fig, axs = plt.subplots(2, 1, figsize=(15, 10))

# Original sequence
axs[0].plot(sequences[:, :36])
axs[0].set_title("Original Sequence")

# Reconstructed sequence
axs[1].plot(reconstructed[:, :36].cpu().detach().numpy())
axs[1].set_title("Reconstructed Sequence")

plt.tight_layout()

plt.show()

In [19]:
# use latent features as input to a classifier

import torch

# Load embeddings and labels

learned_data = torch.load("test_latents.pth")
embeddings = learned_data["embeddings"]
labels = learned_data["labels"]


In [None]:
embeddings.shape, labels.shape

In [None]:

# Split data into training and validation sets
split = int(0.8 * len(embeddings))
train_embeddings, train_labels = embeddings[:split], labels[:split]
val_embeddings, val_labels = embeddings[split:], labels[split:]

# Ensure labels are in the correct format for CrossEntropyLoss
train_labels = train_labels.long() - 1  # Assuming labels are 1 and 2, convert to 0 and 1
val_labels = val_labels.long() - 1

# train a simple classifier using the embeddings


In [None]:
!pip install pandas==1.5.3
# !pip install scikit-learn==1.3.2

import pandas as pd
import pickle
import sklearn
import autosklearn
from autosklearn.experimental.askl2 import AutoSklearn2Classifier
import torch
# Load embeddings and labels

print(f'pandas version: {pd.__version__}')
print(f'autosklearn version: {autosklearn.__version__}')
print(f'sklearn version: {sklearn.__version__}')


learned_data = torch.load("test_latents.pth")
embeddings = learned_data["embeddings"]
labels = learned_data["labels"]


In [2]:

# Split data into training and validation sets
split = int(0.8 * len(embeddings))
train_embeddings, train_labels = embeddings[:split], labels[:split]
val_embeddings, val_labels = embeddings[split:], labels[split:]

# Ensure labels are in the correct format for CrossEntropyLoss
train_labels = train_labels.long() - 1  # Assuming labels are 1 and 2, convert to 0 and 1
val_labels = val_labels.long() - 1


In [3]:

# Define a simple classifier
classifier = AutoSklearn2Classifier(
    ensemble_size=1,
    dataset_compression=False,
    allow_string_features=False,
    time_left_for_this_task=300,
    per_run_time_limit=30,
    metric=autosklearn.metrics.balanced_accuracy,
    delete_tmp_folder_after_terminate=False,
    memory_limit=None,
    disable_evaluator_output=False,
)


In [4]:
X = pd.DataFrame(train_embeddings.cpu().numpy())
y = pd.DataFrame(train_labels.cpu().numpy())

In [None]:
# Fit the classifier
classifier.fit(X, y)


In [None]:
# Evaluate the classifier
accuracy = classifier.score(val_embeddings.cpu().numpy(), val_labels.cpu().numpy())
print(f"Validation Accuracy: {accuracy:.2f}")

