<a href="https://www.kaggle.com/code/riturajpradhan/temporal-intro?scriptVersionId=191804164" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import os
import json

from PIL import Image
from tqdm.notebook import tqdm
import gc
import numpy as np
# from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import GradScaler, autocast
from torch.utils.tensorboard import SummaryWriter
import torch.nn.init as init

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from timm.scheduler.scheduler import Scheduler

from torchvision import transforms
# from transformers import AutoImageProcessor, ResNetModel
from transformers import ViTFeatureExtractor, ViTForImageClassification
# from transformers.image_processing_base import BatchFeature

2024-08-09 15:01:04.354424: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-09 15:01:04.354620: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-09 15:01:04.558382: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
image_paths = []
labels = []
label_path = '/kaggle/input/abaw7-extracted-features/validation_set_annotations.txt'
data_path = '/kaggle/input/abaw-7-dataset/cropped_aligned'
with open(label_path, 'r') as f:
    f.readline()
    data = f.readlines()

for d in data:
    line = d.split(',')
    image_name = line[0]
    image_label = line[3]
    if int(image_label) == -1:
        continue
    image_path = os.path.join(data_path, image_name)
    image_paths.append(image_path)
    labels.append(image_label)
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
for file in os.listdir('/kaggle/working/SGD_Kaiming'):
    if file != 'transformer_on_ViT_e80.pth':
        os.remove(f'/kaggle/working/SGD_Kaiming/{file}')

In [8]:
# run cell to extract image features and store them
image_preprocessor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
feature_extractor = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', output_hidden_states = True, return_dict = True).to(device)
    
# Batch size for processing images
batch_size = 100

batch_count = 0

# Initialize lists to store extracted features
all_features = []

# Process images in batches
for batch_start in tqdm(range(0, len(image_paths), batch_size), desc='Extracting Features'):
    batch_end = min(batch_start + batch_size, len(image_paths))
    batch_images = [Image.open(image_path) for image_path in image_paths[batch_start:batch_end]]
    batch_labels = torch.tensor([float(label) for label in labels[batch_start:batch_end]]).to(device)
    # Tokenize and extract features
    inputs = image_preprocessor(images = batch_images, return_tensors="pt").to(device)
    with torch.no_grad():
        output = feature_extractor(**inputs)
    batch_features = output.hidden_states[-1][:,0,:].clone().detach()
    t = torch.column_stack([batch_features, batch_labels])

    # Append features to the list
    all_features.extend(t)

# Save features to a file (e.g., as a PyTorch tensor)
    # Save features every 1000 images
    if batch_count%50 == 0:
        output_file = f"./validation_features/image_features_val_{batch_count}.pt"
        torch.save(torch.stack(all_features).cpu(), output_file)
        print(f"Features saved to {output_file}")
        
        # Clear memory by resetting the list
        all_features = []
    batch_count += 1
    with open('counter.json', 'w') as f:
        json.dump({'batch_count' : batch_count}, f)

# Save any remaining features
if all_features:
    output_file = f"./validation_features/image_features_val_{batch_count}.pt"
    torch.save(torch.stack(all_features).cpu(), output_file)
    print(f"Remaining features saved to {output_file}")


Extracting Features:   0%|          | 0/155 [00:00<?, ?it/s]

Features saved to ./validation_features/image_features_val_0.pt
Features saved to ./validation_features/image_features_val_50.pt
Features saved to ./validation_features/image_features_val_100.pt
Features saved to ./validation_features/image_features_val_150.pt
Remaining features saved to ./validation_features/image_features_val_155.pt


In [14]:
path = '/kaggle/working/validation_features'
feat_arr = []
for file in os.listdir(path):
    feat_arr.append(torch.load(os.path.join(path, file)))

training_features = torch.row_stack(feat_arr)
training_features.shape

torch.Size([15440, 769])

In [15]:
torch.save(training_features, 'ABAW_validation_features_ViT.pt')

In [2]:
class ImageTransformer(nn.Module):
    def __init__(self, feature_dim, num_classes, num_heads=4, num_layers=6, dropout=0.1, sequence_length=64):
        super(ImageTransformer, self).__init__()
        self.feature_dim = feature_dim
        self.num_classes = num_classes
        
        # Positional encoding
        self.positional_encoding = nn.Parameter(torch.zeros(1, sequence_length + 1, feature_dim))
        
        # Learnable [CLS] token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, feature_dim))
        
        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(d_model=feature_dim, nhead=num_heads, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Classification head
        self.fc1 = nn.Linear(feature_dim, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, num_classes)
        
         # Initialize weights
        self._initialize_weights()

    def _initialize_weights(self):
        # Apply Kaiming initialization to all linear layers
        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    init.zeros_(m.bias)
            elif isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    init.zeros_(m.bias)

    def forward(self, x):
        # Add the [CLS] token to the input sequence
        batch_size = x.size(0)
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # (batch_size, 1, feature_dim)
        x = torch.cat((cls_tokens, x), dim=1)  # (batch_size, sequence_length + 1, feature_dim)
        
        # Add positional encoding
        x = x + self.positional_encoding[:, :x.size(1), :]
        
        # Pass through the transformer encoder
        x = self.transformer_encoder(x)
        
        # Extract the [CLS] token's output
        cls_output = x[:, 0, :]  # (batch_size, feature_dim)
        
        # Pass through the classification head
        x = self.fc1(cls_output)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        
        return x

class ABAWFeatureDataset(Dataset):
    def __init__(self, features, labels, sequence_length):
        self.features = features
        self.labels = labels
        self.sequence_length = sequence_length
        self.seq_start = 0
        self.seq_end = sequence_length
        self.length = labels.shape[0]

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if idx > (self.length - self.sequence_length):
            feature = self.features[self.length - self.sequence_length:, : ]
            label = self.labels[self.length - 1]
        else:
            feature = self.features[idx:idx + self.sequence_length, :]
            label = self.labels[idx + self.sequence_length - 1]
        return feature, label

In [3]:
batch_size = 128
sequence_length = 32
feature_dim = 768
num_classes = 8
num_epochs = 20
learning_rate = 1e-4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

val_label_path = '/kaggle/input/abaw7-extracted-features/validation_set_annotations.txt'
val_labels = []
with open(val_label_path, 'r') as f:
    f.readline()
    data = f.readlines()

for d in data:
    line = d.split(',')
    image_label = line[3]
    if int(image_label) == -1:
        continue
    val_labels.append(image_label)
    
train_label_path = '/kaggle/input/abaw7-extracted-features/training_set_annotations.txt'
train_labels = []
with open(train_label_path, 'r') as f:
    f.readline()
    data = f.readlines()

for d in data:
    line = d.split(',')
    image_label = line[3]
    if int(image_label) == -1:
        continue
    train_labels.append(image_label)
    
# Create dataset and dataloader
train_image_features = torch.load('/kaggle/working/ABAW_training_features_ViT.pt')
temp = train_image_features.detach().numpy()
train_image_features = torch.tensor(temp[:,:768])

train_labels = [int(x) for x in train_labels]
train_label_tensor = torch.tensor(train_labels)
train_dataset = ABAWFeatureDataset(train_image_features, train_label_tensor, sequence_length = sequence_length)
data_loader_train = DataLoader(train_dataset, batch_size=batch_size, drop_last = True, pin_memory=True)

train_length = len(train_dataset)
    
val_image_features = torch.load('/kaggle/working/ABAW_validation_features_ViT.pt')
temp = val_image_features.detach().numpy()
val_image_features = torch.tensor(temp[:,:768])
val_labels = [int(x) for x in val_labels]
val_label_tensor = torch.tensor(val_labels)
val_dataset = ABAWFeatureDataset(val_image_features, val_label_tensor, sequence_length = sequence_length)
data_loader_val = DataLoader(val_dataset, batch_size=batch_size, drop_last = True, pin_memory=True)#, collate_fn=custom_collate_fn)

val_length = len(val_dataset)

In [12]:
class LinearLRScheduler(Scheduler):
    def __init__(self,
                 optimizer: torch.optim.Optimizer,
                 t_initial: int,
                 lr_min_rate: float,
                 warmup_t=0,
                 warmup_lr_init=0.,
                 t_in_epochs=True,
                 noise_range_t=None,
                 noise_pct=0.67,
                 noise_std=1.0,
                 noise_seed=42,
                 initialize=True,
                 ) -> None:
        super().__init__(
            optimizer, param_group_field="lr",
            noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed,
            initialize=initialize)

        self.t_initial = t_initial
        self.lr_min_rate = lr_min_rate
        self.warmup_t = warmup_t
        self.warmup_lr_init = warmup_lr_init
        self.t_in_epochs = t_in_epochs
        if self.warmup_t:
            self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values]
            super().update_groups(self.warmup_lr_init)
        else:
            self.warmup_steps = [1 for _ in self.base_values]

    def _get_lr(self, t):
        if t < self.warmup_t:
            lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps]
        else:
            t = t - self.warmup_t
            total_t = self.t_initial - self.warmup_t
            lrs = [v - ((v - v * self.lr_min_rate) * (t / total_t)) for v in self.base_values]
        return lrs

    def get_epoch_values(self, epoch: int):
        if self.t_in_epochs:
            return self._get_lr(epoch)
        else:
            return None

    def get_update_values(self, num_updates: int):
        if not self.t_in_epochs:
            return self._get_lr(num_updates)
        else:
            return None

In [None]:
# Model, loss function, optimizer
model = ImageTransformer(feature_dim=feature_dim, num_classes=num_classes, sequence_length = sequence_length).to(device)
# model.load_state_dict(torch.load('/kaggle/working/transformer_on_ViT_e40.pth'))
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, dampening=0.1, weight_decay = 1e-4)
scaler = GradScaler()
writer = SummaryWriter('SGD_Kaiming_monitor')
scheduler = LinearLRScheduler(
            optimizer,
            t_initial=100,
            lr_min_rate=0.01,
            warmup_lr_init=5e-5,
            warmup_t=3,
            t_in_epochs=False,
        )
# training_loss_list = []
# validation_loss_list = []
# loss_list = []
# Training loop
for epoch in range(41,140):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(total=len(data_loader_train), desc=f'Epoch {epoch+1}/{num_epochs} LR = {optimizer.param_groups[0]["lr"]}', unit='batch')
    
    for i, (features, targets) in enumerate(data_loader_train):
        features, targets = features.to(device, non_blocking=True), targets.to(device, non_blocking=True)
        optimizer.zero_grad()

        with autocast():
            outputs = model(features)
            # Calculate loss
            loss = criterion(outputs, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * features.size(0)
#         loss_list.append(loss.item())
#         writer.add_scalar('training_loss', loss.item(), epoch * train_length + i)
        # Update progress bar with current loss
        progress_bar.set_postfix(loss=loss.item())
        progress_bar.update(1)
        
    epoch_loss = running_loss / train_length
    writer.add_scalar('training_epoch_loss', epoch_loss, epoch)
    if epoch % 10 == 0:
        torch.save(model.state_dict(), f'/kaggle/working/SGD_Kaiming/transformer_on_ViT_e{epoch}.pth')
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_targets = [] 
    with torch.no_grad():
#         val_progress_bar = tqdm(total=len(data_loader_val), desc=f'Validation', unit='batch')
        for features, targets in data_loader_val:
            features, targets = features.cuda(), targets.cuda() 
            outputs = model(features)
            # Reshape outputs and targets to be compatible with the loss function
            outputs = outputs.view(-1, num_classes)
            # Calculate loss
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())
            validation_loss = criterion(outputs, targets)
            running_loss += validation_loss.item() * features.size(0)
#             val_progress_bar.set_postfix(validation_loss=validation_loss.item())
#             val_progress_bar.update(1)
    
    validation_loss = running_loss / val_length
    scheduler.step(validation_loss)
    
    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)
    f1 = f1_score(all_targets, all_preds, average='weighted')
    precision = precision_score(all_targets, all_preds, average='weighted', zero_division=0)
    recall = recall_score(all_targets, all_preds, average='weighted')
    accuracy = accuracy_score(all_targets, all_preds)

    writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)
    writer.add_scalar('validation_epoch_loss', validation_loss, epoch)
    writer.add_scalar('f1_score', f1, epoch)
    writer.add_scalar('precision', precision, epoch)
    writer.add_scalar('recall', recall, epoch)
    writer.add_scalar('accuracy', accuracy, epoch)

#     training_loss_list.append(epoch_loss)
#     validation_loss_list.append(validation_loss)
    
    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}, validation Loss: {validation_loss:.4f}')
    progress_bar.close()

print("Training complete!")

Epoch 42/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 42/20, Training Loss: 2.0681, validation Loss: 2.0516


Epoch 43/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 43/20, Training Loss: 2.0458, validation Loss: 2.0359


Epoch 44/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 44/20, Training Loss: 2.0253, validation Loss: 2.0216


Epoch 45/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 45/20, Training Loss: 2.0064, validation Loss: 2.0087


Epoch 46/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 46/20, Training Loss: 1.9891, validation Loss: 1.9970


Epoch 47/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 47/20, Training Loss: 1.9731, validation Loss: 1.9865


Epoch 48/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 48/20, Training Loss: 1.9584, validation Loss: 1.9770


Epoch 49/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 77/20, Training Loss: 1.8066, validation Loss: 1.9013


Epoch 78/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 78/20, Training Loss: 1.8053, validation Loss: 1.9012


Epoch 79/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 79/20, Training Loss: 1.8041, validation Loss: 1.9011


Epoch 80/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 80/20, Training Loss: 1.8029, validation Loss: 1.9010


Epoch 81/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 81/20, Training Loss: 1.8019, validation Loss: 1.9009


Epoch 82/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 82/20, Training Loss: 1.8009, validation Loss: 1.9009


Epoch 83/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 83/20, Training Loss: 1.8000, validation Loss: 1.9009


Epoch 84/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 84/20, Training Loss: 1.7991, validation Loss: 1.9009


Epoch 85/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 85/20, Training Loss: 1.7984, validation Loss: 1.9009


Epoch 86/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 86/20, Training Loss: 1.7977, validation Loss: 1.9010


Epoch 87/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 87/20, Training Loss: 1.7969, validation Loss: 1.9010


Epoch 88/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 88/20, Training Loss: 1.7963, validation Loss: 1.9011


Epoch 89/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 89/20, Training Loss: 1.7957, validation Loss: 1.9012


Epoch 90/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 90/20, Training Loss: 1.7951, validation Loss: 1.9013


Epoch 91/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 91/20, Training Loss: 1.7946, validation Loss: 1.9014


Epoch 92/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 92/20, Training Loss: 1.7940, validation Loss: 1.9015


Epoch 93/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 93/20, Training Loss: 1.7936, validation Loss: 1.9016


Epoch 94/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 94/20, Training Loss: 1.7931, validation Loss: 1.9017


Epoch 95/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 95/20, Training Loss: 1.7927, validation Loss: 1.9018


Epoch 96/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 96/20, Training Loss: 1.7923, validation Loss: 1.9019


Epoch 97/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 97/20, Training Loss: 1.7919, validation Loss: 1.9021


Epoch 98/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 98/20, Training Loss: 1.7915, validation Loss: 1.9022


Epoch 99/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 99/20, Training Loss: 1.7913, validation Loss: 1.9023


Epoch 100/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 101/20, Training Loss: 1.7906, validation Loss: 1.9026


Epoch 102/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 102/20, Training Loss: 1.7904, validation Loss: 1.9027


Epoch 103/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 104/20, Training Loss: 1.7899, validation Loss: 1.9029


Epoch 105/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 127/20, Training Loss: 1.7867, validation Loss: 1.9057


Epoch 128/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 128/20, Training Loss: 1.7865, validation Loss: 1.9058


Epoch 129/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 129/20, Training Loss: 1.7865, validation Loss: 1.9059


Epoch 130/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 131/20, Training Loss: 1.7864, validation Loss: 1.9061


Epoch 132/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 132/20, Training Loss: 1.7863, validation Loss: 1.9062


Epoch 133/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 134/20, Training Loss: 1.7862, validation Loss: 1.9064


Epoch 135/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 136/20, Training Loss: 1.7861, validation Loss: 1.9066


Epoch 137/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 137/20, Training Loss: 1.7860, validation Loss: 1.9067


Epoch 138/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 138/20, Training Loss: 1.7860, validation Loss: 1.9068


Epoch 139/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

Epoch 139/20, Training Loss: 1.7859, validation Loss: 1.9068


Epoch 140/20 LR = 5e-05:   0%|          | 0/708 [00:00<?, ?batch/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [35]:
torch.argmax(outputs, dim = 1)

tensor([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], device='cuda:0')

## Testing new feature extractor

In [5]:
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

inputs = feature_extractor(images=image, return_tensors="pt")
outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state,

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [50]:
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)
images = [image for i in range(1,32)]
labels = [i for i in range(1,32)]

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', output_hidden_states = True, return_dict = True)

inputs = feature_extractor(images=images, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
# predicted_class_idx = logits.argmax(-1).item()
# print("Predicted class:", model.config.id2label[predicted_class_idx]),

In [53]:
# outputs.hidden_states[-1][:,0,:]
import torch
t = torch.column_stack([outputs.hidden_states[-1][:,0,:], torch.tensor(labels)])

In [57]:
t[0,:]

tensor([ 2.3126e+00,  5.5116e+00,  1.1788e+01,  5.7725e-01,  6.5475e+00,
        -2.9125e+00,  4.5668e+00, -1.3786e+00,  6.1539e+00, -5.1831e+00,
         4.9711e+00,  8.4007e-01,  7.7080e+00, -3.0897e+00, -3.2443e+00,
         9.7258e+00,  1.1146e+00, -2.1286e+00,  8.7621e+00,  1.6315e+00,
        -8.4574e+00,  1.8422e+00,  1.4254e+00,  6.5619e+00, -1.0730e+01,
         6.0743e+00,  4.2650e+00,  6.0531e+00,  8.9479e+00,  2.5177e+00,
         5.4446e-01,  1.4944e+00,  4.0779e+00,  1.0520e+01, -2.9379e+00,
         6.8438e+00, -4.6463e+00, -3.0405e+00,  1.0135e+00,  8.2927e+00,
         9.9011e+00, -3.0263e+00,  7.5373e-01, -4.1869e+00,  2.0160e+00,
         7.0067e+00,  7.9857e-02, -2.2949e+00,  2.2206e+00, -4.4853e+00,
         1.8516e+00, -7.8394e-01,  4.4800e+00, -3.8795e+00, -1.1654e+01,
        -2.1962e+00,  2.5946e-01,  1.0626e+01, -5.4844e-01, -2.7896e+00,
         1.3051e+01,  8.1810e+00,  4.4351e+00, -6.2068e+00,  1.0071e+00,
        -4.3451e+00,  2.8967e+00, -1.7342e+00,  5.2

In [5]:
gc.collect()
torch.cuda.empty_cache()

In [18]:
# Training with custom feature loader

# Hyperparameters
feature_dim = 2048
num_classes = 8
batch_size = 128
num_epochs = 10
learning_rate = 1e-4
sequence_length = 64

# Model, loss function, optimizer
model = ImageTransformer(feature_dim=feature_dim, num_classes=num_classes, sequence_length = sequence_length).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
# try SGD Optimizer 
 
loss_list = []

image_features = image_features.cuda()
label_tensor = label_tensor.cuda()

# Training loop
for epoch in range(1, num_epochs):
    model.train()
    running_loss = 0.0
    seq_start = 0
    seq_end = sequence_length
    progress_bar = tqdm(total = len(labels))
    while seq_end < label_tensor.shape[0]:
        optimizer.zero_grad()
        out = []
        targets = []
        for i in range(batch_size):
            if seq_end >= label_tensor.shape[0]:
                break
            inputs = image_features[seq_start:seq_start + sequence_length, : ]
            target = label_tensor[seq_end]
#             print('hello',inputs.shape)
#             print(targets.shape)
            # Forward pass
            outputs = model(inputs)

            # Reshape outputs and targets to be compatible with the loss function
            outputs = outputs.view(-1, num_classes)
            out.append(outputs)
            target = target.view(-1)
            targets.append(target)
            seq_start += 1
            seq_end += 1
            if seq_end % 100 == 0:
                progress_bar.update(100)
                    # Calculate loss
        pred = torch.row_stack(out)
        truth = torch.tensor(targets).cuda()
        loss = criterion(pred, truth)
        # Backward pass and optimize
#             print(loss)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        loss_list.append(loss.item())
        # Update progress bar with current loss
#         data_loader.set_postfix(loss=loss.item())
    
    epoch_loss = running_loss / len(dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
    torch.save(model.state_dict(), f'transformer_on_resnet_50_e{epoch}.pth')
    progress_bar.close()

print("Training complete!")


  0%|          | 0/15440 [00:00<?, ?it/s]

After transformer encoding:  torch.Size([64, 2048])
Take mean:  torch.Size([64])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x64 and 2048x256)

In [10]:
# Validation

model = ImageTransformer(feature_dim=feature_dim, num_classes=num_classes, sequence_length = sequence_length).to(device)
model.load_state_dict(torch.load('/kaggle/working/SGD_Kaiming/transformer_on_ViT_e80.pth'))
model.eval()
seq_start = 0
seq_end = sequence_length
progress_bar = tqdm(total = len(labels))
preds = []
with torch.no_grad():
    while seq_end < label_tensor.shape[0]:

        inputs = image_features[seq_start:seq_start + sequence_length, : ].cuda()
        target = label_tensor[seq_end]
    #             print('hello',inputs.shape)
    #             print(targets.shape)
        # Forward pass
        outputs = model(inputs)
        preds.append(torch.argmax(outputs).item())
        seq_start += 1
        seq_end += 1
        progress_bar.update(1)

print(len(preds))

In [6]:
# Validation
# feature_dim = 2048
# num_classes = 8
# batch_size = 128
# num_epochs = 10
# learning_rate = 1e-4
# sequence_length = 64

model = ImageTransformer(feature_dim=feature_dim, num_classes=num_classes, sequence_length = sequence_length).to(device)
model.load_state_dict(torch.load('/kaggle/working/SGD_Kaiming/transformer_on_ViT_e80.pth', map_location=torch.device('cpu')))
criterion = nn.CrossEntropyLoss()

model.eval()

all_preds = []
all_targets = []
running_loss = 0.0
with torch.no_grad():
    progress_bar = tqdm(total=len(data_loader_val), desc='Validation', unit='batch')
    for features, targets in data_loader_val:
#         features, targets = features.cuda(), targets.cuda() 
        outputs = model(features)
        # Reshape outputs and targets to be compatible with the loss function
        outputs = outputs.view(-1, num_classes)
        # Calculate loss
        validation_loss = criterion(outputs, targets)
        running_loss += validation_loss.item() * features.size(0)
        # Collect predictions and true labels
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())
        
        progress_bar.update(1)
        progress_bar.set_postfix(loss=validation_loss.item())
    
    progress_bar.close()

validation_loss = running_loss / val_length

# Calculate metrics
all_preds = np.array(all_preds)
all_targets = np.array(all_targets)

f1 = f1_score(all_targets, all_preds, average='weighted')
precision = precision_score(all_targets, all_preds, average='weighted')
recall = recall_score(all_targets, all_preds, average='weighted')
accuracy = accuracy_score(all_targets, all_preds)

print(f'Validation Loss: {validation_loss:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'Accuracy: {accuracy:.4f}')

Validation:   0%|          | 0/120 [00:00<?, ?batch/s]

Validation Loss: 1.9009
F1 Score: 0.1363
Precision: 0.0884
Recall: 0.2974
Accuracy: 0.2974


  _warn_prf(average, modifier, msg_start, len(result))


In [116]:
from scipy.ndimage import gaussian_filter1d
import numpy as np

preds = torch.tensor(preds)
preds = preds.to('cpu')

sigma = 1.0  # Standard deviation of the Gaussian kernel
filtered_array = gaussian_filter1d(preds, sigma=sigma)

# Discretize the filtered values
discretized_array = np.round(filtered_array).astype(int)  # Round and convert to integers

# Convert the result back to a PyTorch tensor
filtered_tensor = torch.tensor(discretized_array, dtype=torch.int)


filtered_tensor.shape

  preds = torch.tensor(preds)


torch.Size([64])

In [117]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Example ground truth and predictions
y_true = label_tensor[sequence_length:]  # True labels
y_pred = preds  # Predicted labels

# Calculate precision, recall, and F1 score for different averaging methods
precision_macro = precision_score(y_true, y_pred, average='macro')
recall_macro = recall_score(y_true, y_pred, average='macro')
f1_macro = f1_score(y_true, y_pred, average='macro')

precision_micro = precision_score(y_true, y_pred, average='micro')
recall_micro = recall_score(y_true, y_pred, average='micro')
f1_micro = f1_score(y_true, y_pred, average='micro')

precision_weighted = precision_score(y_true, y_pred, average='weighted')
recall_weighted = recall_score(y_true, y_pred, average='weighted')
f1_weighted = f1_score(y_true, y_pred, average='weighted')

print(f'Precision (macro): {precision_macro:.2f}')
print(f'Recall (macro): {recall_macro:.2f}')
print(f'F1 Score (macro): {f1_macro:.2f}')

print(f'Precision (micro): {precision_micro:.2f}')
print(f'Recall (micro): {recall_micro:.2f}')
print(f'F1 Score (micro): {f1_micro:.2f}')

print(f'Precision (weighted): {precision_weighted:.2f}')
print(f'Recall (weighted): {recall_weighted:.2f}')
print(f'F1 Score (weighted): {f1_weighted:.2f}')

accuracy = accuracy_score(y_true, y_pred)

print(f'Accuracy: {accuracy:.2f}')


ValueError: Found input variables with inconsistent numbers of samples: [15408, 64]

In [None]:
torch.tensor(float(outputs.argmax().item()))

In [None]:
img = Image.open('/kaggle/input/abaw-7-dataset/cropped_aligned/1-30-1280x720/00001.jpg').convert('RGB')
transform = transforms.Compose([
            transforms.Resize([224, 224])
        ])

transform(img)