In [35]:
import pickle
import sklearn
import pandas as pd
import numpy as np
import torch

In [37]:
with open('/vision/u/eatang/leaky_video/datasets/epic_kitchens/narration_id_to_prev_actions.pkl', 'rb') as f:
    narration_id_to_prev_actions = pickle.load(f)
    
with open('/vision/u/eatang/leaky_video/datasets/epic_kitchens/narration_id_to_prev_actions_val.pkl', 'rb') as f:
    narration_id_to_prev_actions_val = pickle.load(f)

In [78]:
# hyperparameters

num_prev_actions = 5

In [79]:
x_train = np.zeros((len(narration_id_to_prev_actions), num_prev_actions, 3806))
y_train = np.zeros((len(narration_id_to_prev_actions)))
for i, (k, v) in enumerate(narration_id_to_prev_actions.items()):
    for j in range(-num_prev_actions - 1, -1, 1):
        label = v[j]
        x_train[i, j + 1] = torch.nn.functional.one_hot(torch.LongTensor([label]), num_classes=3806).squeeze().numpy()
    y_train[i] = v[-1]
    
x_val = np.zeros((len(narration_id_to_prev_actions_val), num_prev_actions, 3806))
y_val = np.zeros((len(narration_id_to_prev_actions_val)))
for i, (k, v) in enumerate(narration_id_to_prev_actions_val.items()):
    for j in range(-num_prev_actions - 1, -1, 1):
        label = v[j]
        x_val[i, j + 1] = torch.nn.functional.one_hot(torch.LongTensor([label]), num_classes=3806).squeeze().numpy()
    y_val[i] = v[-1]


In [86]:
# more hyperparameters
embedding_dim = 512  # Adjust the embedding dimension as needed
use_embeddings = False
num_epochs = 20
batch_size = 256
lr = 0.001
hidden_dim = 512
weight_decay = 0

In [87]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# Assuming x is your input data and y is your target data
# x should be a NumPy array or a PyTorch tensor
# y should be a NumPy array or a PyTorch tensor with the same number of rows as x

x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

if use_embeddings:
    # Convert x_train and x_test to PyTorch tensors if they're not already
    x_train_torch = torch.LongTensor(x_train.argmax(-1))
    x_test_torch = torch.LongTensor(x_test.argmax(-1))
else:
    x_train_torch = torch.FloatTensor(x_train.sum(1))
    x_test_torch = torch.FloatTensor(x_test.sum(1))
    
# Convert y_train and y_test to PyTorch tensors if they're not already
y_train_torch = torch.LongTensor(y_train)  # Assuming y contains class indices, change accordingly if needed
y_test_torch = torch.LongTensor(y_test)

device = 'cuda:0'

# Define your MLP with an embedding layer

if use_embeddings:
    mlp = nn.Sequential(
        nn.Embedding(num_embeddings=3806, embedding_dim=embedding_dim),
        nn.Flatten(),
        nn.Linear(embedding_dim*num_prev_actions, hidden_dim),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(hidden_dim, hidden_dim),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(hidden_dim, 3806)
    )
else:
    mlp = nn.Sequential(
        nn.Linear(3806, hidden_dim),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(hidden_dim, hidden_dim),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(hidden_dim, 3806)
    )

mlp.to(device)

# Define your loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(mlp.parameters(), lr=lr, weight_decay=weight_decay)  # Adjust the learning rate as needed

# Training loop
for epoch in range(num_epochs):
#     if epoch % (num_epochs // 2) == 0:
#         optimizer.param_groups[0]["lr"] = optimizer.param_groups[0]["lr"] / 2
    # Training phase
    mlp.train()
    for i in range(0, len(x_train_torch), batch_size):
        x_batch = x_train_torch[i:i+batch_size].to(device)
        y_batch = y_train_torch[i:i+batch_size].to(device)

        outputs = mlp(x_batch)
        loss = criterion(outputs, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Testing phase
    mlp.eval()
    with torch.no_grad():
        test_outputs = mlp(x_test_torch.to(device))
        test_loss = criterion(test_outputs, y_test_torch.to(device))
        
        test_outputs = test_outputs.detach().cpu()
        
        # Compute top-k accuracy
        k = 1  # You can adjust k as needed
        _, top_k_indices = test_outputs.topk(k, dim=1)
        correct_predictions = top_k_indices.eq(y_test_torch.view(-1, 1).expand_as(top_k_indices))
        top_k_accuracy = correct_predictions.sum().item() / len(y_test_torch)
    
    # Print the loss at the end of each epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}, Top-{k} Accuracy: {top_k_accuracy:.4f}')

# After training, you can use the trained model for inference
# For example:
# y_pred = mlp(x_test_torch)
# Perform further actions as needed


Epoch [1/20], Train Loss: 6.2414, Test Loss: 6.3295, Top-1 Accuracy: 0.0263
Epoch [2/20], Train Loss: 6.0214, Test Loss: 6.1570, Top-1 Accuracy: 0.0609
Epoch [3/20], Train Loss: 5.6416, Test Loss: 5.8927, Top-1 Accuracy: 0.0896
Epoch [4/20], Train Loss: 5.3211, Test Loss: 5.6954, Top-1 Accuracy: 0.1065
Epoch [5/20], Train Loss: 5.0266, Test Loss: 5.5366, Top-1 Accuracy: 0.1209
Epoch [6/20], Train Loss: 4.8795, Test Loss: 5.4036, Top-1 Accuracy: 0.1310
Epoch [7/20], Train Loss: 4.6885, Test Loss: 5.2988, Top-1 Accuracy: 0.1391
Epoch [8/20], Train Loss: 4.4868, Test Loss: 5.2133, Top-1 Accuracy: 0.1436
Epoch [9/20], Train Loss: 4.3790, Test Loss: 5.1434, Top-1 Accuracy: 0.1496
Epoch [10/20], Train Loss: 4.1840, Test Loss: 5.0942, Top-1 Accuracy: 0.1510
Epoch [11/20], Train Loss: 4.1271, Test Loss: 5.0635, Top-1 Accuracy: 0.1557
Epoch [12/20], Train Loss: 4.0599, Test Loss: 5.0459, Top-1 Accuracy: 0.1589
Epoch [13/20], Train Loss: 3.9583, Test Loss: 5.0358, Top-1 Accuracy: 0.1602
Epoch [1

In [74]:
mlp[0].weight.dtype

torch.float32

In [76]:
x_batch.dtype

torch.float32