# Fine-tuning DistilBERT para Masked Language Modeling no IMDb

In [None]:
#!pip install transformers datasets tensorflow

# Bibliotecas Principais

In [1]:
import tensorflow as tf
import numpy as np
from datasets import load_dataset
from transformers import DistilBertTokenizer, TFDistilBertForMaskedLM

# Carregando os Dados

In [2]:
dataset = load_dataset('imdb')
print(dataset['train'][0]['text'])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, eve

In [3]:
from datasets import DatasetDict
from sklearn.model_selection import train_test_split

def stratified_subset_dataset(dataset_dict, n_samples_per_split, seed=42):
    """
    Recebe um DatasetDict (por exemplo, {'train': ..., 'test': ...})
    e retorna outro DatasetDict com a mesma estrutura,
    mas com no máximo n_samples_per_split exemplos em cada split,
    mantendo a proporção das labels.
    """
    new_splits = {}

    for split_name, split_data in dataset_dict.items():
        labels = np.array(split_data['label'])
        indices = np.arange(len(labels))
        # Seleção estratificada
        selected_indices, _ = train_test_split(
            indices,
            train_size=min(n_samples_per_split, len(labels)),
            stratify=labels,
            random_state=seed
        )
        new_splits[split_name] = split_data.select(selected_indices)

    return DatasetDict(new_splits)


subset_dataset = stratified_subset_dataset(dataset, n_samples_per_split=2000)

In [7]:
print(len(subset_dataset['train']))
print(len(subset_dataset['test']))

2000
2000


# Tokenizando o Texto

In [4]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

seq_length = 128

def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=seq_length)

tokenized_datasets = subset_dataset.map(tokenize_function, batched=True, remove_columns=['text'])

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Os códigos dos tokens de mascaramento e marcação

In [5]:
cls_id = tokenizer.cls_token_id    # Token [CLS]
sep_id = tokenizer.sep_token_id    # Token [SEP]
mask_id = tokenizer.mask_token_id  # Token [MASK]

print(cls_id, sep_id, mask_id)

101 102 103


# Mascarando 15% dos Dados

In [6]:
def mask_tokens(inputs, tokenizer, mlm_probability=0.15):
    inputs = np.array(inputs)
    labels = np.copy(inputs)

    # Aqui geramos valores aleatórios para cada input
    rand = np.random.rand(*inputs.shape)
    mask_arr = (rand < mlm_probability)

    # Não podemos mascarar tokens especiais, por isso vamos torná-los falsos
    special_tokens = [tokenizer.cls_token_id, tokenizer.sep_token_id]
    for special_id in special_tokens:
        mask_arr[inputs == special_id] = False

    # Aplicar [MASK] nos tokens selecionados
    inputs[mask_arr] = tokenizer.mask_token_id

    # Labels: 0 para posições não mascaradas
    labels[~mask_arr] = 0

    return inputs, labels


**Um pequeno exemplo de mascaramento**

In [7]:
# Exemplo de texto do dataset
text = subset_dataset['train'][0]['text']
text

'I bought this video at Walmart\'s $1 bin. I think I over-paid!!! In the 1940s, Bela Lugosi made a long string of 3rd-rate movies for small studios (in this case, Monogram--the ones who made most of the Bowry Boys films). While the wretchedness of most of these films does not approach the level of awfulness his last films achieved (Ed Wood "classics" such as Bride of the Monster and Plan 9 From Outer Space), they are nonetheless poor films and should be avoided by all but the most die-hard fans.<br /><br />I am an old movie junkie, so I gave this a try. Besides, a few of these lesser films were actually pretty good--just not this one.<br /><br />Lugosi is, what else, a mad scientist who wants to keep his rather bizarre and violent wife alive through a serum he concocts from young brides. They never really explained WHY it had to be brides or why it must be women or even what disease his wife had--so you can see that the plot was never really hashed out at all.<br /><br />Anyways, a rea

In [8]:
# Tokenizar
inputs = tokenizer(text, return_tensors="np", padding="max_length", truncation=True, max_length=128)
input_ids = inputs["input_ids"]

masked_input_ids, labels = mask_tokens(input_ids, tokenizer)

# Mostrar o texto original e o texto mascarado
print("Original: ", text)
print("Token IDs:", input_ids[0])
print("Masked Token IDs:", masked_input_ids[0])
print("Masked Text:", tokenizer.decode(masked_input_ids[0], skip_special_tokens=True))

#101 é o id do token CLS
#103 é o id do token MASK

Original:  I bought this video at Walmart's $1 bin. I think I over-paid!!! In the 1940s, Bela Lugosi made a long string of 3rd-rate movies for small studios (in this case, Monogram--the ones who made most of the Bowry Boys films). While the wretchedness of most of these films does not approach the level of awfulness his last films achieved (Ed Wood "classics" such as Bride of the Monster and Plan 9 From Outer Space), they are nonetheless poor films and should be avoided by all but the most die-hard fans.<br /><br />I am an old movie junkie, so I gave this a try. Besides, a few of these lesser films were actually pretty good--just not this one.<br /><br />Lugosi is, what else, a mad scientist who wants to keep his rather bizarre and violent wife alive through a serum he concocts from young brides. They never really explained WHY it had to be brides or why it must be women or even what disease his wife had--so you can see that the plot was never really hashed out at all.<br /><br />Anywa

# Colocando os dados em uma estrutura mais adequada para o treinamento

In [9]:
from sklearn.model_selection import train_test_split

def tf_dataset(tokenized_inputs, tokenizer, batch_size=8, max_samples=None):
    # Seleção estratificada baseada na label natural
    if max_samples:
        labels = np.array(tokenized_inputs['label'])
        indices = np.arange(len(labels))
        selected_indices, _ = train_test_split(
            indices,
            train_size=max_samples,
            stratify=labels,
            random_state=42
        )
        tokenized_inputs = tokenized_inputs.select(selected_indices)

    # Converter para arrays NumPy
    input_ids = np.array(tokenized_inputs['input_ids'], dtype=np.int32)
    attention_mask = np.array(tokenized_inputs['attention_mask'], dtype=np.int32)

    # Aplicar máscara para MLM
    inputs, labels = mask_tokens(input_ids, tokenizer)
    labels = labels.astype(np.int32)

    # Criar dataset TF
    dataset = tf.data.Dataset.from_tensor_slices(
        (
            {'input_ids': inputs, 'attention_mask': attention_mask},
            labels
        )
    )

    return dataset.shuffle(1000, seed=42).batch(batch_size)

# Criar datasets de treino e validação
train_dataset = tf_dataset(tokenized_datasets['train'], tokenizer, batch_size=8, max_samples=1000)
val_dataset = tf_dataset(tokenized_datasets['test'], tokenizer, batch_size=8, max_samples=1000)


# Carregando dados de um modelo pré-treinado para continuar o pré-treinamento

In [10]:
# O Distilbert possui apenas 6 camadas ao invés das 12 do BERT original
# Ao chamarmos a MaskedLM estamos indicando a cabeça MLM acima da última camada
# A loss é calculada apenas nas posições mascaradas

model = TFDistilBertForMaskedLM.from_pretrained('distilbert-base-uncased', from_pt=True)  # força conversão de PyTorch para TF)
model.compile(optimizer=tf.keras.optimizers.Adam(2e-5),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForMaskedLM: ['vocab_projector.weight']
- This IS expected if you are initializing TFDistilBertForMaskedLM from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForMaskedLM from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertForMaskedLM were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForMaskedLM for predictions withou

In [11]:
model.fit(train_dataset, validation_data=val_dataset, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tf_keras.src.callbacks.History at 0x7b53485118b0>

In [None]:
# Salvando o modelo completo
model.save("distilbert_mlm_tf", save_format="tf")



In [None]:
tokenizer.save_pretrained("distilbert_mlm_tf")

('distilbert_mlm_tf/tokenizer_config.json',
 'distilbert_mlm_tf/special_tokens_map.json',
 'distilbert_mlm_tf/vocab.txt',
 'distilbert_mlm_tf/added_tokens.json')

**Testando as Predições**

In [12]:
text = 'The movie was really [MASK].'

inputs = tokenizer(text, return_tensors='tf', padding='max_length', truncation=True, max_length=seq_length)

#Logits são as saídas relacionadas a cada token da entrada
logits = model(inputs).logits

# Retorna a posição do [MASK] na sequência
mask_position = tf.where(inputs["input_ids"][0] == tokenizer.mask_token_id)[0, 0]

# Obter logits do [MASK] e aplicar softmax
mask_logits = logits[0, mask_position]
mask_probs = tf.nn.softmax(mask_logits)

# Top-k tokens mais prováveis
k = 5
top_k_ids = tf.argsort(mask_probs, direction="DESCENDING")[:k]
top_k_probs = tf.gather(mask_probs, top_k_ids)

# Decodificar os tokens
top_k_tokens = tokenizer.decode(top_k_ids)

# Mostrar resultados
print(text)
print("Top-5 tokens mais prováveis para [MASK]:")
for token_id, prob in zip(top_k_ids.numpy(), top_k_probs.numpy()):
    print(tokenizer.decode([token_id]), f"({prob:.4f})")

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


The movie was really [MASK].
Top-5 tokens mais prováveis para [MASK]:
bad (0.0953)
entertaining (0.0696)
boring (0.0692)
good (0.0614)
scary (0.0506)


In [13]:
inputs['attention_mask']

<tf.Tensor: shape=(1, 128), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32)>

In [14]:
inputs['input_ids']

<tf.Tensor: shape=(1, 128), dtype=int32, numpy=
array([[ 101, 1996, 3185, 2001, 2428,  103, 1012,  102,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0]], dtype=int32)>

# Finetuning Downstream (Classificação)

## Vamos adaptar o dataset para trabalhar com as labels dos sentimentos

In [15]:
def tf_dataset_labels(tokenized_inputs, batch_size=8, max_samples=None, seed=42):
    """
    Cria um tf.data.Dataset usando os labels originais do dataset.
    Mantém subamostragem estratificada se max_samples for definido.
    """
    # Seleção estratificada baseada na label natural
    labels = np.array(tokenized_inputs['label'])

    if max_samples:
        indices = np.arange(len(labels))
        selected_indices, _ = train_test_split(
            indices,
            train_size=min(max_samples, len(indices)),
            stratify=labels,
            random_state=seed
        )
        tokenized_inputs = tokenized_inputs.select(selected_indices)
        labels = labels[selected_indices]

    # Converter para arrays NumPy
    input_ids = np.array(tokenized_inputs['input_ids'], dtype=np.int32)
    attention_mask = np.array(tokenized_inputs['attention_mask'], dtype=np.int32)
    labels = labels.astype(np.int32)

    # Criar dataset TF
    dataset = tf.data.Dataset.from_tensor_slices(
        (
            {'input_ids': input_ids, 'attention_mask': attention_mask},
            labels
        )
    )

    return dataset.shuffle(1000, seed=seed).batch(batch_size)

# Criar datasets de treino e validação
train_dataset = tf_dataset_labels(tokenized_datasets['train'], batch_size=8, max_samples=1000)
test_dataset = tf_dataset_labels(tokenized_datasets['test'], batch_size=8, max_samples=1000)

## Utilizando o Modelo anterior como BASE

In [17]:
# Essas bibliotecas nos permitirão congelar parte da rede e gerar um novo modelo
from tensorflow.keras import layers, Model

# Vamos utilizar somente a parte do transformer desprezando a cabela LM com softmax
base_model = model.distilbert  # pega só o transformer

# Congelar camadas do transformer (opcional)
base_model.trainable = True

In [18]:
from keras import layers, Model
import tensorflow as tf

# Dizem ao modelo o "novo" tipo de tensores da entrada (a forma e o tipo)".
input_ids = layers.Input(shape=(seq_length,), dtype=tf.int32, name="input_ids")
attention_mask = layers.Input(shape=(seq_length,), dtype=tf.int32, name="attention_mask")

# Lambda layer para chamar o transformer (Aspecto Técnico)
def call_transformer(x):
    return base_model(input_ids=x[0], attention_mask=x[1])[0]
# Lambda layer para chamar o transformer (Aspecto Técnico)
embeddings = layers.Lambda(
    call_transformer,
    output_shape=(seq_length, base_model.config.hidden_size)
)([input_ids, attention_mask])

# Usar o [CLS] token (primeira posição) para classificação
cls_token = embeddings[:, 0, :]

# Nova camada de saída para classificação do sentimento
output = layers.Dense(1, activation="sigmoid")(cls_token)

# Modelo final
clf_model = Model(inputs=[input_ids, attention_mask], outputs=output)

#Compilando
clf_model.compile(
    optimizer=tf.keras.optimizers.Adam(2e-5),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)


clf_model.build(input_shape=(None, seq_length))

clf_model.summary()


In [20]:
epochs = 3

# Treinar o modelo usando os datasets já batched
history = clf_model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=epochs
)

Epoch 1/3
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 85ms/step - accuracy: 0.5176 - loss: 0.6936 - val_accuracy: 0.5330 - val_loss: 0.6883
Epoch 2/3
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 70ms/step - accuracy: 0.5599 - loss: 0.6863 - val_accuracy: 0.5450 - val_loss: 0.6868
Epoch 3/3
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 57ms/step - accuracy: 0.5444 - loss: 0.6891 - val_accuracy: 0.5580 - val_loss: 0.6854


In [21]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, classification_report

y_test_prob = clf_model.predict(test_dataset)
y_test_pred = (y_test_prob > 0.5).astype(int).flatten()
y_test_true = np.array(tokenized_datasets['test']['label'][:len(y_test_pred)])

accuracy_test = accuracy_score(y_test_true, y_test_pred)
f1_test = f1_score(y_test_true, y_test_pred)
report_test = classification_report(y_test_true, y_test_pred)

print(f"\nTest Accuracy: {accuracy_test:.4f}")
print(f"Test F1-score: {f1_test:.4f}")
print("Test Classification report:")
print(report_test)


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 28ms/step

Test Accuracy: 0.5100
Test F1-score: 0.5010
Test Classification report:
              precision    recall  f1-score   support

           0       0.50      0.54      0.52       492
           1       0.52      0.48      0.50       508

    accuracy                           0.51      1000
   macro avg       0.51      0.51      0.51      1000
weighted avg       0.51      0.51      0.51      1000



# Usando um modelo já pronto

In [22]:
from transformers import TFDistilBertModel
from tensorflow.keras import layers, Model
import tensorflow as tf

# -----------------------------------------------------------
# Carregar o modelo DistilBERT previamente treinado e salvo
# -----------------------------------------------------------
base_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased", from_pt=True)

# Congelar base (opcional)
base_model.trainable = False

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_transform.weight', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [23]:
# -----------------------------------------------------------
# Dizem ao modelo o "novo" tipo de tensores da entrada (a forma e o tipo)".
input_ids = layers.Input(shape=(seq_length,), dtype=tf.int32, name="input_ids")
attention_mask = layers.Input(shape=(seq_length,), dtype=tf.int32, name="attention_mask")

# Lambda layer para chamar o transformer (Aspecto Técnico)
def call_transformer(x):
    return base_model(input_ids=x[0], attention_mask=x[1])[0]
# Lambda layer para chamar o transformer (Aspecto Técnico)
embeddings = layers.Lambda(
    call_transformer,
    output_shape=(seq_length, base_model.config.hidden_size)
)([input_ids, attention_mask])

# Usar o [CLS] token (primeira posição) para classificação
cls_token = embeddings[:, 0, :]

# Nova camada de saída para classificação do sentimento
output = layers.Dense(1, activation="sigmoid")(cls_token)

# Modelo final
clf_model = Model(inputs=[input_ids, attention_mask], outputs=output)

#Compilando
clf_model.compile(
    optimizer=tf.keras.optimizers.Adam(2e-5),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

clf_model.build(input_shape=(None, seq_length))

clf_model.summary()

In [25]:
epochs = 10

# Treinar o modelo usando os datasets já batched
history = clf_model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=epochs
)

Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 125ms/step - accuracy: 0.4790 - loss: 0.8095 - val_accuracy: 0.5000 - val_loss: 0.7539
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 60ms/step - accuracy: 0.4995 - loss: 0.7520 - val_accuracy: 0.4980 - val_loss: 0.7283
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 58ms/step - accuracy: 0.4881 - loss: 0.7336 - val_accuracy: 0.5000 - val_loss: 0.7134
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 70ms/step - accuracy: 0.5199 - loss: 0.7103 - val_accuracy: 0.5120 - val_loss: 0.7047
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 57ms/step - accuracy: 0.5134 - loss: 0.7081 - val_accuracy: 0.5220 - val_loss: 0.6995
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 56ms/step - accuracy: 0.5076 - loss: 0.7047 - val_accuracy: 0.5200 - val_loss: 0.6960
Epoch 7/10
[1m125

**Avaliando o modelo**

In [26]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, classification_report

y_test_prob = clf_model.predict(test_dataset)
y_test_pred = (y_test_prob > 0.5).astype(int).flatten()
y_test_true = np.array(tokenized_datasets['test']['label'][:len(y_test_pred)])

accuracy_test = accuracy_score(y_test_true, y_test_pred)
f1_test = f1_score(y_test_true, y_test_pred)
report_test = classification_report(y_test_true, y_test_pred)

print(f"\nTest Accuracy: {accuracy_test:.4f}")
print(f"Test F1-score: {f1_test:.4f}")
print("Test Classification report:")
print(report_test)

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 29ms/step

Test Accuracy: 0.4970
Test F1-score: 0.4479
Test Classification report:
              precision    recall  f1-score   support

           0       0.49      0.60      0.54       492
           1       0.51      0.40      0.45       508

    accuracy                           0.50      1000
   macro avg       0.50      0.50      0.49      1000
weighted avg       0.50      0.50      0.49      1000

