In [2]:
# Importando o dataframe
# Primeira análise apenas com extroversão
data_path = "../chalearn_dataset"

In [11]:
import pandas as pd

data_training = pd.read_csv(f"{data_path}/train/extraversion_data.csv")
df_training = pd.DataFrame.from_dict(data_training)

df_training.head(2000)

Unnamed: 0.1,Unnamed: 0,extraversion
0,zEyRyTnIw5I.005.mp4,0
1,nskJh7v6v1U.004.mp4,0
2,eHcRre1YsNA.000.mp4,0
3,VuadgOz6T7s.000.mp4,0
4,7nhJXn9PI0I.001.mp4,0
...,...,...
1995,9yZEb6bdxNY.004.mp4,1
1996,dNXqs5HNijI.004.mp4,1
1997,rG8D-A2F8xg.004.mp4,1
1998,F-Dy1EFm_Mw.005.mp4,1


## Implementando um primeiro modelo

EfficientNet B0 -> Transformers -> Classification

In [13]:
IMG_SIZE = 224

In [30]:
# Camadas iniciais do modelo:

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)) # Por enquanto o input é apenas uma imagem
# Usa EfficientNet B0 como extratora de características da imagem que iremos processar
features_extraction = EfficientNetB0(include_top=False, weights='imagenet')
features_extraction.trainable = False
features_extraction = features_extraction(inputs)

model = tf.keras.Model(inputs, features_extraction)
# model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 efficientnetb0 (Functional)  (None, None, None, 1280)  4049571  
                                                                 
Total params: 4,049,571
Trainable params: 0
Non-trainable params: 4,049,571
_________________________________________________________________


In [31]:
# Transformers - Camada final

In [51]:
class Patches(layers.Layer):
    def __init__(self, patch_dim):
        super(Patches, self).__init__()
        self.patch_dim = patch_dim

    def call(self, tensors):
        batch_size = tf.shape(tensors)[0]
        patches = tf.reshape(tensors, [batch_size, -1, patch_dim])
        print("Patches shape = ", patches.shape)
        return patches

In [52]:
# Já temos um patch projetado linearmente, só precisamos agora fazer o embedding
class Encoder(layers.Layer):
    def __init__(self, num_patches, patch_dim):
        super(Encoder, self).__init__()
        self.num_patches = num_patches
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=patch_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = patch + self.position_embedding(positions)
        return encoded

In [39]:
# def create_classifier():
#     inputs = layers.Input(shape=input_shape)
#     # Augment data.
#     augmented = data_augmentation(inputs)
#     # Create patches.
#     patches = Patches(patch_size)(augmented)
#     # Encode patches.
#     encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

#     # Create multiple layers of the Transformer block.
#     for _ in range(transformer_layers):
#         # Layer normalization 1.
#         x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
#         # Create a multi-head attention layer.
#         attention_output = layers.MultiHeadAttention(
#             num_heads=num_heads, key_dim=projection_dim, dropout=0.1
#         )(x1, x1)
#         # Skip connection 1.
#         x2 = layers.Add()([attention_output, encoded_patches])
#         # Layer normalization 2.
#         x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
#         # MLP.
#         x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
#         # Skip connection 2.
#         encoded_patches = layers.Add()([x3, x2])

#     # Create a [batch_size, projection_dim] tensor.
#     representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
#     representation = layers.Flatten()(representation)
#     representation = layers.Dropout(0.5)(representation)
#     # Add MLP.
#     features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
#     # Classify outputs.
#     logits = layers.Dense(num_classes)(features)
#     # Create the Keras model.
#     model = keras.Model(inputs=inputs, outputs=logits)
#     return model

In [54]:
# Camadas iniciais do modelo:
patch_dim = 64
num_patches = 1280 // patch_dim

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)) # Por enquanto o input é apenas uma imagem
# Usa EfficientNet B0 como extratora de características da imagem que iremos processar
features_extraction = EfficientNetB0(include_top=False, weights='imagenet')
features_extraction.trainable = False
features_extraction = features_extraction(inputs)
# Patches
patches = Patches(patch_dim)(features_extraction)

# Encodding
encoded_patches = Encoder(num_patches, patch_dim)(patches)


model = tf.keras.Model(inputs, encoded_patches)
# model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

Patches shape =  (None, None, 64)
Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_28 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 efficientnetb0 (Functional)  (None, None, None, 1280)  4049571  
                                                                 
 patches_5 (Patches)         (None, None, 64)          0         
                                                                 
 encoder_2 (Encoder)         (None, 20, 64)            1280      
                                                                 
Total params: 4,050,851
Trainable params: 1,280
Non-trainable params: 4,049,571
_________________________________________________________________
