In [45]:
import torch
import torch.nn as nn   
import torch.nn.functional as F
import torch.optim as optim
import pytorch_lightning as pl
import torchmetrics

# iter_freqs = [("Theta", 4, 7), ("Alpha", 8, 12), ("Beta", 13, 25), ("Gamma", 30, 45)]


In [None]:
import google.colab.drive
google.colab.drive.mount('/content/drive')

In [None]:
# Dropout(p=0.1, inplace=False)

import torch
import torch.nn as nn
import numpy as np
import pickle

with open("./data/face_recognition_preprocessed_data/dataset/event_dict_all.pkl", "rb") as f:
    event_dict_all = pickle.load(f)

print(event_dict_all[:10])

path = './data/face_recognition_preprocessed_data/dataset/target_trial_type.npy'

target_trial_type = np.load(path)

print(target_trial_type.shape)

path = './data/face_recognition_preprocessed_data/dataset/target_labels.pt'

target_labels = torch.load(path)

print(target_labels.shape)

path = './data/face_recognition_preprocessed_data/dataset/X.pt'

X = torch.load(path)

print(X.shape)

### data preprocessing for face recognition

In [None]:
import glob


dict_label = {"5": "Initial presentation of famous face",
                "6": "Immediate repeated  presentation of famous face",
                "7": "Delayed repeated  presentation of famous face",
                "13": "Initial presentation of unfamiliar face",
                "14": "Immediate repeated  presentation of unfamiliar face",
                "15": "Delayed repeated  presentation of unfamiliar face",
                "17": "Initial presentation of scrambled face",
                "18": "Immediate repeated  presentation of scrambled face",
                "19": "Delayed repeated  presentation of scrambled face",
                "256": "Left button press",
                "4096": "Right button press"}

print(dict_label)

classes = [5.0, 6.0, 7.0, 13.0, 14.0, 15.0, 17.0, 18.0, 19.0]
pick_number_of_samples_per_class = [2600, 1200, 1200, 2500, 1200, 1200, 2600, 1200, 1200]
number_of_samples_per_class = [2628, 1273, 1253, 2588, 1252, 1257, 2614, 1234, 1293]

path = './data/face_recognition_preprocessed_data/dataset/target_labels.pt'

labels_face = torch.load(path)

path = './data/face_recognition_preprocessed_data/dataset/X.pt'

y_signal = torch.load(path)

print(y_signal.shape)

print(labels_face.shape)

path = glob.glob('./data/face_recognition_preprocessed_data/tfr_dataset/data_per_class_*.pt')

lenght = sum([torch.load(p).shape[0] for p in path])
print('Number of frequency maps ',lenght)


# retrieve the respective samples and labels 
# these will be used to train the model in a multi-task fashion
#       the model will be trained to regress y_signal (the signal) from the frequency maps
#       whereas the dict_label are the classes that we try to predict

#       additionally, I use as class tocken (CLS) the labels_face (describe the type of face that was presented)
#       the tocken condition the transformer given the feature extracted from the frequency maps


conta = 0
y_sample = []
y_label = []
for n_sample_class, sample_to_pick in zip(number_of_samples_per_class, pick_number_of_samples_per_class):
    print()
    print('Pick the respective samples:',y_signal[conta:conta+sample_to_pick].shape)
    print('Pick the respective labels:',labels_face[conta:conta+sample_to_pick].shape)
    y_sample.append(y_signal[conta:conta+sample_to_pick])
    y_label.append(labels_face[conta:conta+sample_to_pick])
    conta += n_sample_class


y_sample = torch.cat(y_sample, dim=0)
y_label = torch.cat(y_label, dim=0)

# # save the data
# path = './data/face_recognition_preprocessed_data/tfr_dataset/y_sample.pt'
# torch.save(y_sample, path)
# path = './data/face_recognition_preprocessed_data/tfr_dataset/y_label.pt'
# torch.save(y_label, path)

In [None]:
y_sample.shape, y_label.shape, torch.load('./data/face_recognition_preprocessed_data/tfr_dataset/data_per_class_5.0.pt').shape

### Architecture for face recognition

In [1]:
import torch
import torch.nn as nn   
import torch.nn.functional as F
import torch.optim as optim
import pytorch_lightning as pl
import torchmetrics
from typing import Any
from pytorch_lightning.utilities.types import STEP_OUTPUT

##########################################################################################################
##########################################################################################################
##########################################################################################################





class ConvBlock(nn.Module):
    """Apply a two steps convolution with normalization and GELU activation"""
    def __init__(self, channel_in, channel_out, shape_norm, kernel_size=3, stride=1, padding=1, dilation=1):
        super().__init__()

        self.conv1 = nn.Conv2d(channel_in, channel_out, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.conv2 = nn.Conv2d(channel_out, channel_out, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.ln = nn.LayerNorm(shape_norm)
        self.gelu = nn.GELU()


    def forward(self, x):
        x = self.conv1(x)
        x = self.ln(x)
        x = self.gelu(x)
        x = self.conv2(x)
        x = self.ln(x)
        x = self.gelu(x)
        return x


##########################################################################################################
##########################################################################################################
##########################################################################################################


class UnetEncoderBlock(nn.Module):
    """Downsaple the input by a factor 2 raising the number of channels by a factor 2"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        shape_norm = [out_channels, 26, 189]
        self.convblock = ConvBlock(in_channels, out_channels, shape_norm)
        self.maxpool = nn.MaxPool2d((2, 2))

    def forward(self, x):
        x = self.convblock(x)
        # print(x.shape)
        pooled = self.maxpool(x)
        # print(pooled.shape)
        return x, pooled


##########################################################################################################
##########################################################################################################
##########################################################################################################

class UnetDecoderBlock(nn.Module):
    """Upsample the input by a factor 2 lowering the number of channels by a factor 2"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        # self.conv1 = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels,  dilation=1, kernel_size=3, padding=0, stride=2)#dilation=1,
        # self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels,  dilation=1, kernel_size=3, padding=1)#dilation=1,
        self.conv1 = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels,  kernel_size=2, stride=2, padding=0, dilation=1)
        shape_norm = [out_channels,  26, 189]
        self.convblock = ConvBlock(in_channels+out_channels, out_channels, shape_norm)

    def forward(self, x, skip):
        skip = self.conv1(skip)
        # print(x.shape, skip.shape)
        # pad x to match skip   
        # torch.Size([32, 74, 26, 188]) torch.Size([32, 148, 26, 189])
        skip = nn.functional.pad(skip, (0, 1, 0, 0))
        x = torch.cat([x, skip], dim=1)
        x = self.convblock(x)
        return x
    

##########################################################################################################
##########################################################################################################
##########################################################################################################

class Unet(nn.Module):
    """Unet architecture"""
    def __init__(self, encoder_steps = 2, decoder_steps = 2, n_channels = 74):
        super().__init__()

        self.encoder_steps = encoder_steps
        self.decoder_steps = decoder_steps


        # encoder
        self.encoder_1 = UnetEncoderBlock(n_channels, n_channels*2)
        self.encoder_2 = UnetEncoderBlock(n_channels*2, n_channels*4)
        # decoder
        self.decoder_1 = UnetDecoderBlock(n_channels*4, n_channels*2)
        self.decoder_2 = UnetDecoderBlock(n_channels*2, n_channels)


    def forward(self, x):

        # encoder
        x, pooled_1 = self.encoder_1(x)
        x, pooled_2 = self.encoder_2(x) # bottleneck

        # decoder
        x = self.decoder_1(x, pooled_2)
        x = self.decoder_2(x, pooled_1)

        return x


##########################################################################################################
##########################################################################################################
##########################################################################################################


class FeatureExtractor(nn.Module):
    def __init__(self, n_channels = 74, kernel_size=3, stride=2, padding=0, dilation=1, dropout=0.1, stride_factor_reduction = 2):
        super().__init__()

        self.n_channels = n_channels

        self.channel_reduction = 16
        self.stride_factor_reduction = stride_factor_reduction

        self.conv1 = nn.Conv2d(in_channels=n_channels, out_channels=n_channels//self.channel_reduction, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.conv2 = nn.Conv2d(in_channels=n_channels//self.channel_reduction, out_channels=n_channels//self.channel_reduction, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation)

        self.activation = nn.GELU()

        self.layer_norm_1 = nn.LayerNorm([n_channels//self.channel_reduction, 26//self.stride_factor_reduction-1, 189//self.stride_factor_reduction])
        self.layer_norm_2 = nn.LayerNorm([n_channels//self.channel_reduction, (26//self.stride_factor_reduction-1)//self.stride_factor_reduction-1, (189//self.stride_factor_reduction -1)//self.stride_factor_reduction])

        self.dropout = nn.Dropout(dropout, inplace=True)

    def forward(self, x):

        # feature extraction block 1
        x = self.conv1(x)
        # print(x.shape)
        x = self.activation(x)
        x = self.layer_norm_1(x)
        x = self.dropout(x)

        # feature extraction block 2
        x = self.conv2(x)
        # print(x.shape)
        x = self.activation(x)
        x = self.layer_norm_2(x)
        x = self.dropout(x)

        x = x.flatten(start_dim=2, end_dim=-1)

        return x
    
class FeatureTransformerBlock(nn.Module):   
    """Transformer block for the feature extraction"""
    def __init__(self, lenght_signal = 189, num_t_layer = 1, n_head = 3, stride_factor_reduction = 2):
        super().__init__()
        
        self.num_t_layer = num_t_layer
        self.n_head = n_head
        
        self.stride_factor_reduction = stride_factor_reduction

        freq = (26//self.stride_factor_reduction-1)//self.stride_factor_reduction-1
        time_dim = (189//self.stride_factor_reduction -1)//self.stride_factor_reduction

        self.linear_transformation = nn.Linear(freq*time_dim, lenght_signal, bias=True)

        # 768 is the dimension of the embedding of BERT
        self.linear_transformation_tface = nn.Linear(768, lenght_signal, bias=True)
        
        self.te_layer = nn.TransformerEncoderLayer(d_model=lenght_signal, nhead=n_head)
        self.te = nn.TransformerEncoder(self.te_layer, num_layers=num_t_layer)

    def forward(self, x, t_face):
        """Return the cls token and the embedding"""

        # linear transformation
        x = self.linear_transformation(x)

        # add cls token
        # cls_token = t_face.unsqueeze(-1).to(x.device)
        cls_token = t_face.to(x.device)
        cls_token = self.linear_transformation_tface(cls_token).unsqueeze(-2)
        # print(cls_token.shape, x.shape)
        # cls_token = torch.zeros(x.shape[0], 1, x.shape[2]).to(x.device)
        
        # concat cls token with the embedding
        x = torch.cat([cls_token, x], dim=1)

        # transpose to match transformer input
        x = x.transpose(0, 1)

        # transformer
        x = self.te(x)

        # transpose back
        x = x.transpose(0, 1)

        # get the cls token
        cls_token = x[:, 0, :]

        # get the embedding
        x = x[:, 1:, :].permute(0, 2, 1)

        return cls_token, x
    

##########################################################################################################
##########################################################################################################
##########################################################################################################


class Architecture(nn.Module):
    """Net architecture"""
    def __init__(self, n_channels = 74, lenght_signal = 189, n_classes = 9):
        super().__init__()

        self.unet = Unet()
        self.feature_extractor = FeatureExtractor(n_channels = 74)
        self.feature_transformer = FeatureTransformerBlock()

        self.up_sampling_channels = nn.Linear(4, n_channels)
        self.mlp_classifier = nn.Sequential(
            nn.Linear(lenght_signal, lenght_signal//8),
            nn.GELU(),
            nn.LayerNorm(lenght_signal//8),
            nn.Linear(lenght_signal//8, n_classes),
        )

    def forward(self, x, t_face):
        # unet
        x = self.unet(x)
        # feature extractor
        x = self.feature_extractor(x)
        # feature transformer
        cls_token, x = self.feature_transformer(x, t_face)

        # up sampling
        x = self.up_sampling_channels(x).permute(0, 2, 1)

        # mlp classifier
        cls_classification = self.mlp_classifier(cls_token)

        return cls_classification, x
    

##########################################################################################################
##########################################################################################################
##########################################################################################################

class Net(pl.LightningModule):
    def __init__(self, n_channels = 74, lenght_signal = 189, n_classes = 9):
        super().__init__()

        self.model = Architecture(n_channels = n_channels, lenght_signal = lenght_signal, n_classes = n_classes)

        self.criterion_classification = nn.CrossEntropyLoss()
        self.criterion_regression = nn.MSELoss()
        self.huber_loss = nn.SmoothL1Loss()
        self.classification_accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=n_classes)

    def forward(self, x, t_face):
        return self.model(x, t_face)
    
    def training_step(self, x, batch_idx):
        # get the data
        x, x_face, y_label_signal, y_label_class = x

        cls_classification, x = self.forward(x, x_face)

        # classification loss
        loss_classification = self.criterion_classification(cls_classification, y_label_class)

        # regression loss
        loss_regression = self.criterion_regression(x, y_label_signal)

        # total loss
        loss = loss_classification + loss_regression

        huber_loss = self.huber_loss(x, y_label_signal)

        y_label_class = torch.argmax(y_label_class, dim = -1)

        # classification accuracy
        acc = self.classification_accuracy(cls_classification, y_label_class)

        self.log('train_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_loss_classification', loss_classification, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_loss_regression', loss_regression, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_huber_loss', huber_loss, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc_classification', acc, on_epoch=True, prog_bar=True, logger=True)

        return loss
    
    def validation_step(self, x, batch_idx):
        # get the data
        x, x_face, y_label_signal, y_label_class = x

        cls_classification, x = self.forward(x, x_face)

        # classification loss
        loss_classification = self.criterion_classification(cls_classification, y_label_class)

        # regression loss
        loss_regression = self.criterion_regression(x, y_label_signal)

        # total loss
        loss = loss_classification + loss_regression

        huber_loss = self.huber_loss(x, y_label_signal)

        y_label_class = torch.argmax(y_label_class, dim = -1)

        # classification accuracy
        acc = self.classification_accuracy(cls_classification, y_label_class)

        self.log('val_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_loss_classification', loss_classification, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_loss_regression', loss_regression, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_huber_loss', huber_loss, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc_classification', acc, on_epoch=True, prog_bar=True, logger=True)

        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=1e-3)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10)
        return [optimizer], [scheduler]


  from .autonotebook import tqdm as notebook_tqdm


### Prepare data for face recognition

prepare the data to be used for face recognition. Due to the memory load I decided to reduce the number of samples to 700 for each sample.

In [6]:
x_face_full.shape, sum(number_of_samples_per_class), y_full.shape

(torch.Size([14900]), 15392, torch.Size([14900, 74, 189]))

code to pick class and event id from the data

In [2]:
import torch
import glob
dict_classes = {5: 0, 6: 1, 7: 2, 13: 3, 14: 4, 15: 5, 17: 6, 18: 7, 19: 8}
classes = [5.0, 6.0, 7.0, 13.0, 14.0, 15.0, 17.0, 18.0, 19.0]
number_of_samples_per_class = [2628, 1273, 1253, 2588, 1252, 1257, 2614, 1234, 1293]
pick_number_of_samples_per_class = [2600, 1200, 1200, 2500, 1200, 1200, 2600, 1200, 1200]

# signal to regrss
path = './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/y_sample.pt'
y_full = torch.load(path)

# face cls
path = './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/y_label.pt'
x_face_full = torch.load(path)

path = sorted(glob.glob('./data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_*.pt'))

print(path)

conta = 0
number_sample_per_class = 1000

X = []
x_face = []
y_label_signal = []
y_label_class = []

for p, n_sample_class in zip(path, pick_number_of_samples_per_class):
    print(p)
    data = torch.load(p)
    data = data[:number_sample_per_class].type(torch.float16)

    # feature maps with the presence of the frequencies
    X.append(data)

    # face cls
    x_face.append(x_face_full[conta:conta+number_sample_per_class])

    # signal to regress
    y_label_signal.append(y_full[conta:conta+number_sample_per_class]*1e3)

    # class
    class_id = torch.ones(number_sample_per_class, dtype=torch.int8)*int(p.split('_')[-1].split('.')[0])
    y_label_class.append(class_id)

    conta = conta + n_sample_class

X = torch.cat(X, dim = 0)
x_face = torch.cat(x_face, dim = 0)
y_label_signal = torch.cat(y_label_signal, dim = 0)
y_label_class = torch.cat(y_label_class, dim = 0)

print(X.shape, x_face.shape, y_label_signal.shape, y_label_class.shape)
# torch.Size([9000, 74, 26, 189]) torch.Size([9000]) torch.Size([9000, 74, 189]) torch.Size([9000])

# torch.save(X, './data/face_recognition_preprocessed_data/tfr_dataset/X.pt')
# torch.save(x_face, './data/face_recognition_preprocessed_data/tfr_dataset/x_face.pt')
# torch.save(y_label_signal, './data/face_recognition_preprocessed_data/tfr_dataset/y_label_signal.pt')
# torch.save(y_label_class, './data/face_recognition_preprocessed_data/tfr_dataset/y_label_class.pt')

['./data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_05.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_06.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_07.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_13.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_14.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_15.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_17.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_18.pt', './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_19.pt']
./data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_05.pt
./data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_06.pt
./da

For each type of event embedd the description that will become the cls component to condition the transformer. Now for each face I encode the type of face in the latent space of a pretrained LLM (BERT).

In [None]:
# face cls
import numpy as np
import pickle
import torch

# load pre-trained LLM from huggerface
from transformers import BertModel, BertConfig, BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
config = BertConfig.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased', config=config)

# freeze all the parameters
for param in model.parameters():
    param.requires_grad = False

def get_features(model, x):
    with torch.no_grad():
        x = model(**tokenizer(x, return_tensors='pt')).pooler_output
    return x

# load the dictionary with the type of the features
path = './data/face_recognition_preprocessed_data/dataset/event_dict_all.pkl'

with open(path, 'rb') as f:
    event_dict = pickle.load(f)
    
path = './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/y_label.pt'
x_face_full = torch.load(path)

# load the dictionary with the type of face presented (event type)
path = './data/face_recognition_preprocessed_data/dataset/target_trial_type.npy'
t_dict = np.load(path)

print(set(list(t_dict)))

dict_event_id = {e: idx for idx, e in enumerate(set(list(t_dict)))}

# face cls
# with this dictionary we can get the type of the face
# that were presented to the subject
# than the event type is translated into a sentence which 
# describe the event
# finally for each sentence I compute the embedding with the LLM
# and I use this embedding as a feature to regress the signal
# and to classify the class of the face that were presented
# to the subject
dict_element_face_type = {"famous_new": "Initial presentation of famous face",
                            "famous_second_early": "Immediate repeated presentation of famous face",
                            "famous_second_late": "Delayed repeated presentation of famous face",
                            "scrambled_new": "Initial presentation of scrambled face",
                            "scrambled_second_early": "Immediate repeated presentation of scrambled face",
                            "scrambled_second_late": "Delayed repeated presentation of scrambled face",
                            "unfamiliar_new": "Initial presentation of unfamiliar face",
                            "unfamiliar_second_early": "Immediate repeated presentation of unfamiliar face",
                            "unfamiliar_second_late": "Delayed repeated presentation of unfamiliar face",
                            "right_nonsym": "Right button press to indicated less symmetric face",
                            "left_sym": "Left button press to indicated more symmetric face",
                            "left_nonsym": "Left button press to indicated less symmetric face",
                            "right_sym": "Right button press to indicated more symmetric face",
                            "boundary": "The boundary mark of each run as the run data was merged",
                            "famous": "Famous or famiiar face",
                            "unfamiliar": "Unfamiliar face",
                            "scrambled": "Scrambled face"}


dict_element_face_type_embedding = {
    k: get_features(model, v) for k, v in dict_element_face_type.items()
}

# covert the label array into a torch tensor with the embedding
# of the sentence that describe the event
x_face_embedding = torch.cat([dict_element_face_type_embedding[i] for i in t_dict], dim = 0)

print(x_face_embedding.shape)

y_label_face = torch.tensor([dict_event_id[i] for i in t_dict]).type(torch.int8)

print(y_label_face.shape)

# torch.save(x_face_embedding, './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/x_face_embedding.pt')
# torch.save(y_label_face, './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/y_label_face.pt')


Extract and preprocess the data to perform the training of the model

In [None]:
import torch
import glob
dict_classes = {5: 0, 6: 1, 7: 2, 13: 3, 14: 4, 15: 5, 17: 6, 18: 7, 19: 8}
classes = [5.0, 6.0, 7.0, 13.0, 14.0, 15.0, 17.0, 18.0, 19.0]
number_of_samples_per_class = [2628, 1273, 1253, 2588, 1252, 1257, 2614, 1234, 1293]
pick_number_of_samples_per_class = [2600, 1200, 1200, 2500, 1200, 1200, 2600, 1200, 1200]

# signal to regrss
path = './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/y_sample.pt'
y_full = torch.load(path)

# signal to regrss
path = './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/y_label_face.pt'
y_class = torch.load(path)

# face cls
path = './data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/x_face_embedding.pt'
x_face_full = torch.load(path)

path = sorted(glob.glob('./data/face_recognition_preprocessed_data/tfr_dataset/full_dataset/data_per_class_*.pt'))

print(path)

conta = 0 
conta_full = 0 # becase I pick the samples from the full dataset
number_sample_per_class = 1000

X = []
x_face = []
y_label_signal = []
y_label_class = []

for p, n_sample_class, n_sample_class_full in zip(path, pick_number_of_samples_per_class, number_of_samples_per_class):
    print(p)
    data = torch.load(p)
    data = data[:number_sample_per_class].type(torch.float16)

    # feature maps with the presence of the frequencies
    X.append(data)

    # face cls
    x_face.append(x_face_full[conta_full:conta_full+number_sample_per_class])

    # class
    y_label_class.append(y_class[conta_full:conta_full+number_sample_per_class])

    # signal to regress
    y_label_signal.append(y_full[conta:conta+number_sample_per_class]*1e3)

    conta = conta + n_sample_class
    conta_full = conta_full + n_sample_class_full
    

X = torch.cat(X, dim = 0)
x_face = torch.cat(x_face, dim = 0)
y_label_signal = torch.cat(y_label_signal, dim = 0)
y_label_class = torch.cat(y_label_class, dim = 0)

print(X.shape, x_face.shape, y_label_signal.shape, y_label_class.shape)

# torch.save(X, './data/face_recognition_preprocessed_data/tfr_dataset/X.pt')
# torch.save(x_face, './data/face_recognition_preprocessed_data/tfr_dataset/x_face.pt')
# torch.save(y_label_signal, './data/face_recognition_preprocessed_data/tfr_dataset/y_label_signal.pt')
# torch.save(y_label_class, './data/face_recognition_preprocessed_data/tfr_dataset/y_label_class.pt')

In [2]:
import torch
path = './data/face_recognition_preprocessed_data/tfr_dataset/X.pt'
x = torch.load(path)
x = x[:5000].type(torch.float32)

torch.save(x, './data/face_recognition_preprocessed_data/tfr_dataset/X_5000.pt')

In [2]:
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import DataLoader, TensorDataset, random_split

# number of classes
NUMBER_CLASSES = 9

#############################
# load the data
#############################

path = './data/face_recognition_preprocessed_data/tfr_dataset/X.pt'
x = torch.load(path)
x = x[:5000].type(torch.float32)

path = './data/face_recognition_preprocessed_data/tfr_dataset/x_face.pt'
x_face = torch.load(path)
x_face = x_face[:5000].type(torch.float32)

path = './data/face_recognition_preprocessed_data/tfr_dataset/y_label_signal.pt'
y_label_signal = torch.load(path)
y_label_signal = y_label_signal[:5000].type(torch.float32)

path = './data/face_recognition_preprocessed_data/tfr_dataset/y_label_class.pt'
y_label_class = torch.load(path)
y_label_class = y_label_class[:5000].type(torch.float32)

# conversion for one hot encoding
# dict_class = {5: 0, 6: 1, 7: 2, 13: 3, 14: 4, 15: 5, 17: 6, 18: 7, 19: 8}
# y_label_class = torch.tensor([dict_class[y] for y in y_label_class.tolist()], dtype=torch.float32)

# class label into one hot encoding
y_label_class = torch.nn.functional.one_hot(y_label_class.to(torch.int64), num_classes=NUMBER_CLASSES).type(torch.float32)


#############################
# split the data
#############################

# dataset
dataset = TensorDataset(x, x_face, y_label_signal, y_label_class)

del x, x_face, y_label_signal, y_label_class

# train, validation and test
train_size = int(0.8 * len(dataset))
val_size = int(0.2 * len(dataset))

train_set, val_set = random_split(dataset, [train_size, val_size])

# train, validation and test dataloader
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4)

del train_set

val_loader = DataLoader(val_set, batch_size=32, shuffle=False, num_workers=4)

del val_set

#############################
# train
#############################

# model
model = Net()

# # callbacks
# checkpoint_callback = ModelCheckpoint(
#     monitor='val_loss',
#     dirpath='./models/face_recognition',
#     filename='face_recognition-{epoch:02d}-{val_loss:.2f}',
#     save_top_k=2,   
#     mode='min',
# )

# early_stop_callback = EarlyStopping(
#     monitor='val_loss',
#     min_delta=0.00,
#     patience=5,
#     verbose=True,
#     mode='min'
# )

# trainer
trainer = pl.Trainer(
    accelerator='auto',
    # callbacks=[checkpoint_callback, early_stop_callback],
    max_epochs=10
)

# train
trainer.fit(model, train_loader, val_loader)

# load the best model
# model = Net.load_from_checkpoint(checkpoint_callback.best_model_path)



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                     | Type               | Params
----------------------------------------------------------------
0 | model                    | Architecture       | 11.3 M
1 | criterion_classification | CrossEntropyLoss   | 0     
2 | criterion_regression     | MSELoss            | 0     
3 | huber_loss               | SmoothL1Loss       | 0     
4 | classification_accuracy  | MulticlassAccuracy | 0     
----------------------------------------------------------------
11.3 M    Trainable params
0         Non-trainable params
11.3 M    Total params
45.118    Total estimated model params size (MB)


Epoch 1:   2%|▏         | 3/125 [00:06<04:33,  2.24s/it, v_num=17, train_loss_step=0.702, train_loss_classification_step=0.608, train_loss_regression_step=0.0938, train_huber_loss_step=0.0445, train_acc_classification_step=0.844, val_loss=nan.0, val_loss_classification=nan.0, val_loss_regression=nan.0, val_huber_loss=nan.0, val_acc_classification=0.816, train_loss_epoch=1.660, train_loss_classification_epoch=1.460, train_loss_regression_epoch=0.200, train_huber_loss_epoch=0.0945, train_acc_classification_epoch=0.487]  

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
