## A. Installation

### A.1. Structure

Pour réinitialiser la structure (from scratch) :

In [None]:
!cd /content
!rm -rf /content/Merval
!git clone https://github.com/mervealgan/Merval

Cloning into 'Merval'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 35 (delta 12), reused 29 (delta 11), pack-reused 0 (from 0)[K
Receiving objects: 100% (35/35), 366.01 KiB | 20.33 MiB/s, done.
Resolving deltas: 100% (12/12), done.


In [None]:
!rm -rf /content/training/data
!mkdir -p /content/training/data/features

!cp -r /content/Merval/data/features/* /content/training/data/features
!cp /content/Merval/data/test_set.csv /content/training/data/test_set.csv
!cp /content/Merval/data/training_set.csv /content/training/data/training_set.csv
!cp /content/Merval/data/valid_set.csv /content/training/data/valid_set.csv

In [None]:
%cd /content/training/

/content/training


### A.2. imports

In [None]:
!pip install tbparse
!pip install syntok
!pip install stanza
!pip install textcomplexity
!pip install transformers[torch]
!pip install accelerate -U

Collecting tbparse
  Downloading tbparse-0.0.9-py3-none-any.whl.metadata (8.7 kB)
Downloading tbparse-0.0.9-py3-none-any.whl (19 kB)
Installing collected packages: tbparse
Successfully installed tbparse-0.0.9
Collecting syntok
  Downloading syntok-1.4.4-py3-none-any.whl.metadata (10 kB)
Downloading syntok-1.4.4-py3-none-any.whl (24 kB)
Installing collected packages: syntok
Successfully installed syntok-1.4.4
Collecting stanza
  Downloading stanza-1.8.2-py3-none-any.whl.metadata (13 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.12.1-py3-none-any.whl.metadata (5.4 kB)
Downloading stanza-1.8.2-py3-none-any.whl (990 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.1/990.1 kB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading emoji-2.12.1-py3-none-any.whl (431 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m431.4/431.4 kB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji, stanza
Successfully in

## B. Entrainement

In [None]:
import hashlib
import os
import torch
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import (Sequential, load_model)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from transformers import (AutoTokenizer, TrainingArguments, AutoModelForSequenceClassification, set_seed, Trainer,
                          EarlyStoppingCallback, )


class TCCDataset(torch.utils.data.Dataset):
    def __init__(self, tokens, labels):
        self.tokens = tokens
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # return tensor
        item = {key: val[idx].clone().detach() for key, val in self.tokens.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item


class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.MSELoss()
        loss = loss_fct(
            logits.view(-1, self.model.config.num_labels),
            labels.float().view(-1, self.model.config.num_labels),
        )
        return (loss, outputs) if return_outputs else loss


class OptimizedESCallback(EarlyStoppingCallback):
    def __init__(self, patience, initial_steps_wo_save):
        super().__init__(early_stopping_patience=patience)
        self.initial_steps_wo_save = initial_steps_wo_save

    def check_metric_value(self, args, state, control, metric_value):
        super().check_metric_value(args, state, control, metric_value)
        if self.early_stopping_patience_counter == 0:
            control.should_save = True

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        if state.global_step < self.initial_steps_wo_save:
            return
        super().on_evaluate(args, state, control, metrics, **kwargs)


def compute_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    return {
        "root_mean_squared_error": rmse,
        "mean_absolute_error": mae,
        "mean_squared_error": mse,
    }


def compute_metrics_for_regression(eval_pred):
    logits, labels = eval_pred
    labels = labels.reshape(-1, 1)

    rmse = mean_squared_error(labels, logits, squared=False)
    mse = mean_squared_error(labels, logits)
    mae = mean_absolute_error(labels, logits)

    return {
        "root_mean_squared_error": rmse,
        "mean_absolute_error": mae,
        "mean_squared_error": mse,
    }


def get_hugging_face_name(name):
    if name == "camembert-base":
        return "almanach/camembert-base"
    if name == "camembert-large":
        return "almanach/camembert-large"
    return ""


def load_dataset(path, encoding="utf-8", shuffle=True):
    df = pd.read_csv(path, encoding=encoding)
    df.drop_duplicates(inplace=True)
    if shuffle:
        df = df.sample(frac=1, random_state=9).reset_index(drop=True)
    return df


def load_dataset_with_features_fr(dataset, data_root_path='data_fr'):
    df = load_dataset(os.path.join(data_root_path, f'{dataset}_set.csv'))
    df_features = pd.read_csv(os.path.join(data_root_path, 'features', f'features_{dataset}_readability_fr.csv'))
    df_merged = df.merge(df_features, on='ID', suffixes=('', '_df2'))

    # drop or ignore some columns
    ignore_columns = ['sentence_df2', 'paragraphs', 'sentences_per_paragraph']
    df_merged.drop(columns=ignore_columns, inplace=True)

    # add some of our own features
    df_merged['max_word_length'] = df_merged['sentence'].apply(lambda x: max([len(w) for w in x.split()]))

    for i in range(5, 10):
        df_merged['num_word_longer_than_' + str(i)] = df_merged['sentence'].apply(
            lambda x: sum([len(w) > i for w in x.split()]))

    feature_columns = df_merged.columns.to_list()[df_merged.columns.to_list().index('sentence') + 1:]

    return df_merged, feature_columns


os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

BOOTSTRAP_SIZE = 1000  # 1000
MAX_ENSEMBLE_SIZE = 35  # 60
ENSEMBLE_POOL_SIZE = 40  # 100
N_FOLDS = 5
MODEL_NAME = 'camembert-base'  # ['gbert', 'gelectra', 'gottbert', 'gerpt']
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 16
N_EVAL_STEPS = 23

EXPERIMENT_NAME = f'ensemble_{MODEL_NAME}'
EXPERIMENT_DIR = f'cache/{EXPERIMENT_NAME}'

from tensorflow.keras.callbacks import TensorBoard
log_dir = f'{EXPERIMENT_DIR}/logs/mlp/'
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

df_train, feature_columns = load_dataset_with_features_fr('training', data_root_path='data')


def get_predictions(
        df_train_folds,
        df_val_fold,
        n_epochs=5,
        n_log_steps=10,
):
    tf.debugging.disable_traceback_filtering()
    # storing predictions in dataframe
    # columns: Sentence, Prediction of Model 1, Prediction of Model 2, ...
    df_predictions_val_fold = df_val_fold[['ID', 'sentence']].copy()

    # get tokenizer
    tokenizer = AutoTokenizer.from_pretrained(get_hugging_face_name(MODEL_NAME))

    X_val_fold = df_val_fold['sentence'].values
    X_val_fold_features = df_val_fold[feature_columns].values

    # tokenize
    tokens_val_fold = tokenizer(X_val_fold.tolist(), padding='max_length', return_tensors='pt', truncation=True,
                                max_length=128)

    for k in range(ENSEMBLE_POOL_SIZE):
        df_early_stopping = df_train_folds.sample(frac=0.1, random_state=k)
        df_train_no_es = df_train_folds.drop(
            df_train_folds[
                df_train_folds['ID'].isin(df_early_stopping['ID'])
            ].index
        )

        ## or use this simplified code to drop rows whose 'ID' is in df_early_stopping['ID']
        # df_train_no_es = df_train_folds[~df_train_folds['ID'].isin(df_early_stopping['ID'])]

        X_early_stopping = df_early_stopping['sentence'].values
        X_early_stopping_features = df_early_stopping[feature_columns].values
        y_early_stopping = df_early_stopping['MOS'].values

        X_training = df_train_no_es['sentence'].values
        X_training_features = df_train_no_es[feature_columns].values
        y_training = df_train_no_es['MOS'].values

        # tokenize
        tokens_early_stopping = tokenizer(X_early_stopping.tolist(), padding='max_length', return_tensors='pt',
                                          truncation=True, max_length=128)

        tokens_training = tokenizer(X_training.tolist(), padding='max_length', return_tensors='pt', truncation=True,
                                    max_length=128)

        hash = (
                hashlib.sha256(
                    pd.util.hash_pandas_object(df_train_no_es['ID'], index=True).values
                ).hexdigest()
                + '_'
                + get_hugging_face_name(MODEL_NAME)[
                  get_hugging_face_name(MODEL_NAME).find('/') + 1:
                  ]
        )

        # load model and, if necessary, train it
        try:
            print(f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}')
            model = AutoModelForSequenceClassification.from_pretrained(
                f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}', local_files_only=True, num_labels=1
            )
        except EnvironmentError:
            # create training dataset
            early_stopping_dataset = TCCDataset(tokens_early_stopping, y_early_stopping)
            training_dataset = TCCDataset(tokens_training, y_training)

            training_args = TrainingArguments(
                output_dir=f'{EXPERIMENT_DIR}/{MODEL_NAME}_trainer/',
                num_train_epochs=n_epochs,
                per_device_train_batch_size=TRAIN_BATCH_SIZE,
                per_device_eval_batch_size=VALID_BATCH_SIZE,
                warmup_ratio=0.3,
                learning_rate=5e-5,
                no_cuda=False,
                metric_for_best_model='root_mean_squared_error',
                greater_is_better=False,
                load_best_model_at_end=True,
                save_steps=N_EVAL_STEPS * 100_000,
                # we never want to save a model through this function, but the parameter must be set, because of load_best_model_at_end=True
                save_total_limit=1,  # can be 1, because we only save, when we find a better model
                eval_steps=N_EVAL_STEPS,
                # `evaluation_strategy` is deprecated, Use `eval_strategy` instead
                eval_strategy='steps',
                seed=k,
                logging_steps=n_log_steps,
                logging_dir=f'{EXPERIMENT_DIR}/logs/member_{k}',
                logging_strategy='steps',
            )

            set_seed(training_args.seed)
            model = AutoModelForSequenceClassification.from_pretrained(
                get_hugging_face_name(MODEL_NAME), num_labels=1
            )

            trainer = RegressionTrainer(
                model=model,
                args=training_args,
                train_dataset=training_dataset,
                eval_dataset=early_stopping_dataset,
                compute_metrics=compute_metrics_for_regression,
                callbacks=[OptimizedESCallback(patience=5, initial_steps_wo_save=300)],
            )
            # training
            trainer.train()

            # save model
            model.save_pretrained(f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}')

        # load hidden states of model for validation and test data
        hidden_state_val_fold = extract_hidden_state(model, tokens_val_fold)

        # normalize data with StandardScaler
        scaler = StandardScaler()
        scaler.fit(df_train_folds[feature_columns].values)
        X_val_fold_features_scaled = scaler.transform(X_val_fold_features)
        X_val_fold_with_features = np.concatenate((hidden_state_val_fold.detach().numpy(), X_val_fold_features_scaled),
                                                  axis=1)

        # load MLP model and, if necessary, train it
        try:
            mlp = load_model(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')
        except Exception:
            hidden_state_train = extract_hidden_state(model, tokens_training)
            hidden_state_early_stopping = extract_hidden_state(model, tokens_early_stopping)

            np.random.seed(k)
            mlp = Sequential(
                [
                    Input(shape=(model.config.hidden_size + len(feature_columns),), name='input'),
                    Dense(model.config.hidden_size, activation='relu', name='layer1'),
                    Dense(1, activation='linear', name='layer2'),
                ]
            )

            mlp.compile(
                optimizer='rmsprop',
                loss=tf.keras.losses.MeanSquaredError(),
                metrics=[tf.keras.metrics.RootMeanSquaredError()],
            )
            es = EarlyStopping(monitor='val_root_mean_squared_error', mode='min', verbose=1, patience=100)
            mc = ModelCheckpoint(
                f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras',
                 monitor='val_root_mean_squared_error',
                 mode='min',
                 verbose=1,
                 save_best_only=True
            )

            # normalize data with StandardScaler
            scaler = StandardScaler()
            scaler.fit(X_training_features)
            X_train_features_scaled = scaler.transform(X_training_features)
            X_es_features_scaled = scaler.transform(X_early_stopping_features)

            X_train_with_features = np.concatenate((hidden_state_train.detach().numpy(), X_train_features_scaled),
                                                   axis=1)
            X_es_with_features = np.concatenate((hidden_state_early_stopping.detach().numpy(), X_es_features_scaled),
                                                axis=1)

            mlp.fit(X_train_with_features, y_training,
                    validation_data=(X_es_with_features, y_early_stopping),
                    batch_size=TRAIN_BATCH_SIZE,
                    #epochs=5000, callbacks=[es, mc])
                    epochs=10, callbacks=[tensorboard_callback, es, mc])

        # Manually save the model after training
        mlp.save(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')

        mlp = tf.keras.models.load_model(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')

        # predict MLP on validation and test sets
        prediction_val_fold = mlp.predict(X_val_fold_with_features, batch_size=VALID_BATCH_SIZE)

        df_predictions_val_fold[f'{MODEL_NAME}_prediction_{k}'] = prediction_val_fold

    return df_predictions_val_fold


def extract_hidden_state(model, tokens, batch_size=16):
    last_last_hidden_state = torch.zeros((len(tokens.input_ids), model.config.hidden_size))
    model = model.cuda().eval()
    with torch.no_grad():
        for i in range(0, len(tokens.input_ids), batch_size):
            if i + batch_size > len(tokens.input_ids):
                input_i = tokens.input_ids[i:]
            else:
                input_i = tokens.input_ids[i:i + batch_size]
            output = model(input_i.cuda(), output_hidden_states=True)
            last_hidden_state = output.hidden_states[-1].cpu()
            idx_last_token = torch.zeros(len(input_i)).long()
            last_last_hidden_state[i:i + len(idx_last_token)] = last_hidden_state[
                torch.arange(len(idx_last_token)), idx_last_token]
    return last_last_hidden_state


# dataframe for each metric for each model for each ensemble size
# 3d array: [ensemble_size, model_index, metric_index]
df_macro_ensemble_scores = pd.DataFrame(
    columns=[
        'ensemble_size',
        'model_name',
        'mean_absolute_error_mean',
        'mean_absolute_error_std',
        'mean_squared_error_mean',
        'mean_squared_error_std',
        'root_mean_squared_error_mean',
        'root_mean_squared_error_std',
    ]
)

for fold, (train_idx, val_idx) in enumerate(KFold(n_splits=N_FOLDS).split(df_train)):
    df_train_folds = df_train.loc[train_idx]
    df_val_fold = df_train.loc[val_idx]
    # fill na with mean of columns of train data
    df_train_folds = df_train_folds.fillna(df_train_folds.mean(numeric_only=True))
    df_val_fold = df_val_fold.fillna(df_train_folds.mean(numeric_only=True))

    y_val_fold = df_val_fold['MOS'].values

    pool_predictions_val_fold = get_predictions(df_train_folds, df_val_fold)

    for current_ensemble_size in range(1, MAX_ENSEMBLE_SIZE + 1):
        np.random.seed(current_ensemble_size)
        idx = np.random.choice(
            ENSEMBLE_POOL_SIZE,
            size=(BOOTSTRAP_SIZE, current_ensemble_size),
        )

        idx_mapped = np.array(
            [
                np.array(
                    [pool_predictions_val_fold[f'{MODEL_NAME}_prediction_{k}'] for k in j]
                )
                for j in idx
            ]
        )

        ensemble_predictions = np.array(
            [np.sum(j, axis=0) / len(j) for j in idx_mapped]
        )

        ensemble_scores = [
            compute_metrics(y_val_fold, pred) for pred in ensemble_predictions
        ]

        df_ensemble_scores = pd.DataFrame(ensemble_scores).sort_index(axis=1)

        # add to dataframe
        new_row = pd.DataFrame(
            {
                'ensemble_size': [current_ensemble_size],
                'model_name': [MODEL_NAME],
                'mean_absolute_error_mean': [df_ensemble_scores['mean_absolute_error'].mean()],
                'mean_absolute_error_std': [df_ensemble_scores['mean_absolute_error'].std()],
                'mean_squared_error_mean': [df_ensemble_scores['mean_squared_error'].mean()],
                'mean_squared_error_std': [df_ensemble_scores['mean_squared_error'].std()],
                'root_mean_squared_error_mean': [df_ensemble_scores['root_mean_squared_error'].mean()],
                'root_mean_squared_error_std': [df_ensemble_scores['root_mean_squared_error'].std()],
            })

df_macro_ensemble_scores = pd.concat([df_macro_ensemble_scores, new_row], ignore_index=True)

df_macro_ensemble_scores[
    df_macro_ensemble_scores['model_name'] == MODEL_NAME
    ].to_csv(
    f'ensemble_scores_{MODEL_NAME}.csv', index=False, sep=';', encoding='utf-8'
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/508 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/811k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.40M [00:00<?, ?B/s]

cache/ensemble_camembert-base/models/camembert-base/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base


model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6224,2.490271,1.578059,1.346344,2.490271
46,1.5675,0.677024,0.822814,0.656252,0.677024
69,0.6184,0.647892,0.804918,0.575335,0.647892


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m17s[0m 1s/step - loss: 7.8671 - root_mean_squared_error: 2.8048
Epoch 1: val_root_mean_squared_error improved from inf to 0.71674, saving model to cache/ensemble_camembert-base/models/mlp/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - loss: 2.9046 - root_mean_squared_error: 1.6238 - val_loss: 0.5137 - val_root_mean_squared_error: 0.7167
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2091 - root_mean_squared_error: 0.4573
Epoch 2: val_root_mean_squared_error improved from 0.71674 to 0.32506, saving model to cache/ensemble_camembert-base/models/mlp/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3580 - root_mean_squared_error: 0.5899

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4457,2.944228,1.715875,1.512103,2.944228
46,1.187,0.654739,0.809159,0.621589,0.654739
69,0.7241,0.478783,0.691942,0.537792,0.478783


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 6.8592 - root_mean_squared_error: 2.6190
Epoch 1: val_root_mean_squared_error improved from inf to 0.57848, saving model to cache/ensemble_camembert-base/models/mlp/c91b46895596b42df4e1385f135441ffc449535d6e19f5b434fe664fa6596880_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - loss: 2.6354 - root_mean_squared_error: 1.5503 - val_loss: 0.3346 - val_root_mean_squared_error: 0.5785
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1404 - root_mean_squared_error: 0.3747
Epoch 2: val_root_mean_squared_error did not improve from 0.57848
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3138 - root_mean_squared_error: 0.5519 - val_loss: 0.3349 - val_root_mean_squared_error: 0.5787
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9223,2.323345,1.524252,1.309859,2.323345
46,1.3393,0.615166,0.784325,0.59986,0.615166
69,0.6901,0.54708,0.739648,0.564562,0.54708


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 9.3589 - root_mean_squared_error: 3.0592
Epoch 1: val_root_mean_squared_error improved from inf to 0.95640, saving model to cache/ensemble_camembert-base/models/mlp/bb22c2bb2b0d9700bf8d7df910af0b4b56226bc98a51ea26c92306f122bc13de_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1313 - root_mean_squared_error: 1.6858 - val_loss: 0.9147 - val_root_mean_squared_error: 0.9564
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7012 - root_mean_squared_error: 0.8374
Epoch 2: val_root_mean_squared_error improved from 0.95640 to 0.77632, saving model to cache/ensemble_camembert-base/models/mlp/bb22c2bb2b0d9700bf8d7df910af0b4b56226bc98a51ea26c92306f122bc13de_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4578 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8766,2.268238,1.506067,1.28535,2.268237
46,1.2554,0.624005,0.78994,0.663967,0.624005
69,0.673,0.610587,0.781401,0.673154,0.610587


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 6.8781 - root_mean_squared_error: 2.6226
Epoch 1: val_root_mean_squared_error improved from inf to 0.98321, saving model to cache/ensemble_camembert-base/models/mlp/8aef8ffb6ad3e8ad051fa6ca45595b1683b1810a3df6a0d40d82dadfc03b83cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9855 - root_mean_squared_error: 1.6522 - val_loss: 0.9667 - val_root_mean_squared_error: 0.9832
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.8714 - root_mean_squared_error: 0.9335
Epoch 2: val_root_mean_squared_error improved from 0.98321 to 0.89401, saving model to cache/ensemble_camembert-base/models/mlp/8aef8ffb6ad3e8ad051fa6ca45595b1683b1810a3df6a0d40d82dadfc03b83cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5490 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9295,1.850559,1.360353,1.137468,1.850559
46,1.179,0.62416,0.790038,0.619678,0.62416
69,0.7745,0.584097,0.764263,0.59769,0.584097


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 6.6992 - root_mean_squared_error: 2.5883
Epoch 1: val_root_mean_squared_error improved from inf to 0.47244, saving model to cache/ensemble_camembert-base/models/mlp/e2a0523e902b4fa55bc61bb26cb278bb453811177034abbde9401cfb09771f10_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7744 - root_mean_squared_error: 1.5981 - val_loss: 0.2232 - val_root_mean_squared_error: 0.4724
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2808 - root_mean_squared_error: 0.5299
Epoch 2: val_root_mean_squared_error improved from 0.47244 to 0.36607, saving model to cache/ensemble_camembert-base/models/mlp/e2a0523e902b4fa55bc61bb26cb278bb453811177034abbde9401cfb09771f10_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6105 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.638,2.555076,1.598461,1.376049,2.555076
46,1.1095,0.698769,0.835924,0.683623,0.698769
69,0.6943,0.674993,0.821579,0.67618,0.674992


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 7.4595 - root_mean_squared_error: 2.7312
Epoch 1: val_root_mean_squared_error improved from inf to 0.85796, saving model to cache/ensemble_camembert-base/models/mlp/29c90b483b02e6177a1a12c987aec1a1e8281ffc7d477fc08cfe781a9d709a36_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2723 - root_mean_squared_error: 1.7351 - val_loss: 0.7361 - val_root_mean_squared_error: 0.8580
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5859 - root_mean_squared_error: 0.7654
Epoch 2: val_root_mean_squared_error improved from 0.85796 to 0.64330, saving model to cache/ensemble_camembert-base/models/mlp/29c90b483b02e6177a1a12c987aec1a1e8281ffc7d477fc08cfe781a9d709a36_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3751 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7186,3.076987,1.754134,1.544781,3.076987
46,1.1753,0.702124,0.837929,0.6261,0.702124
69,0.7136,0.677145,0.822888,0.626652,0.677145


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 736ms/step - loss: 6.4894 - root_mean_squared_error: 2.5474
Epoch 1: val_root_mean_squared_error improved from inf to 0.58376, saving model to cache/ensemble_camembert-base/models/mlp/5e4f0c82c76c1de4ecc7bac97603b913aac8745333016a7294efc52160b5c5d9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - loss: 2.7940 - root_mean_squared_error: 1.5988 - val_loss: 0.3408 - val_root_mean_squared_error: 0.5838
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6314 - root_mean_squared_error: 0.7946
Epoch 2: val_root_mean_squared_error did not improve from 0.58376
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7278 - root_mean_squared_error: 0.8383 - val_loss: 0.8469 - val_root_mean_squared_error: 0.9203
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2185,2.747128,1.657446,1.43819,2.747128
46,1.1518,0.67539,0.821821,0.651487,0.67539
69,0.5743,0.659459,0.812071,0.657141,0.659459


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 5.6554 - root_mean_squared_error: 2.3781
Epoch 1: val_root_mean_squared_error improved from inf to 0.69086, saving model to cache/ensemble_camembert-base/models/mlp/2417b66244af6950d394b792a0879e5baa52df8057357916af4a1fe9cd9f3884_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7137 - root_mean_squared_error: 1.5786 - val_loss: 0.4773 - val_root_mean_squared_error: 0.6909
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.6897 - root_mean_squared_error: 0.8305
Epoch 2: val_root_mean_squared_error improved from 0.69086 to 0.43999, saving model to cache/ensemble_camembert-base/models/mlp/2417b66244af6950d394b792a0879e5baa52df8057357916af4a1fe9cd9f3884_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6813 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6656,2.990136,1.729201,1.399785,2.990136
46,1.2244,1.045353,1.022425,0.861133,1.045353
69,0.6457,0.971906,0.985853,0.828424,0.971907


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 9.5542 - root_mean_squared_error: 3.0910
Epoch 1: val_root_mean_squared_error improved from inf to 0.59611, saving model to cache/ensemble_camembert-base/models/mlp/8685d5ee8bd3e79fd0dbf4432f172dbbe03105c5dfba450cc744a927bd73c7d3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.8535 - root_mean_squared_error: 1.5997 - val_loss: 0.3553 - val_root_mean_squared_error: 0.5961
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2984 - root_mean_squared_error: 0.5463
Epoch 2: val_root_mean_squared_error improved from 0.59611 to 0.41126, saving model to cache/ensemble_camembert-base/models/mlp/8685d5ee8bd3e79fd0dbf4432f172dbbe03105c5dfba450cc744a927bd73c7d3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4047 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0704,1.954869,1.398166,1.181809,1.954869
46,1.2021,0.622058,0.788707,0.629392,0.622058
69,0.6667,0.559942,0.748293,0.602306,0.559942


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 5.2000 - root_mean_squared_error: 2.2804
Epoch 1: val_root_mean_squared_error improved from inf to 0.55208, saving model to cache/ensemble_camembert-base/models/mlp/8a4814e4aec03a6479b9ec24494843818b497ad7e712337f8943760b856168f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9738 - root_mean_squared_error: 1.6617 - val_loss: 0.3048 - val_root_mean_squared_error: 0.5521
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2340 - root_mean_squared_error: 0.4837
Epoch 2: val_root_mean_squared_error did not improve from 0.55208
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4122 - root_mean_squared_error: 0.6381 - val_loss: 0.3254 - val_root_mean_squared_error: 0.5704
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9723,1.652092,1.285337,1.001235,1.652092
46,1.0637,0.820233,0.905667,0.798332,0.820233
69,0.6086,0.837434,0.915114,0.808982,0.837434


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 6.2276 - root_mean_squared_error: 2.4955
Epoch 1: val_root_mean_squared_error improved from inf to 0.70197, saving model to cache/ensemble_camembert-base/models/mlp/091c75152f6299a2a53f8c4354dbe816ca45e6e866b8e88328b36ab63710f859_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5841 - root_mean_squared_error: 1.5392 - val_loss: 0.4928 - val_root_mean_squared_error: 0.7020
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5968 - root_mean_squared_error: 0.7726
Epoch 2: val_root_mean_squared_error did not improve from 0.70197
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6371 - root_mean_squared_error: 0.7863 - val_loss: 0.7572 - val_root_mean_squared_error: 0.8702
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6698,2.600871,1.612722,1.328857,2.600871
46,1.077,0.865307,0.930219,0.819799,0.865307
69,0.737,0.859238,0.926951,0.813159,0.859238


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 5.1103 - root_mean_squared_error: 2.2606
Epoch 1: val_root_mean_squared_error improved from inf to 0.54560, saving model to cache/ensemble_camembert-base/models/mlp/783025a51c2d51d131a8dd0b535c8c8be204f5884b4e643d90ebbe147d95b624_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8191 - root_mean_squared_error: 1.6170 - val_loss: 0.2977 - val_root_mean_squared_error: 0.5456
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2721 - root_mean_squared_error: 0.5216
Epoch 2: val_root_mean_squared_error improved from 0.54560 to 0.48912, saving model to cache/ensemble_camembert-base/models/mlp/783025a51c2d51d131a8dd0b535c8c8be204f5884b4e643d90ebbe147d95b624_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4493 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9688,1.943637,1.394144,1.174379,1.943637
46,1.0207,0.666181,0.816199,0.683988,0.666181
69,0.7505,0.510273,0.714334,0.594686,0.510273


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 6.0223 - root_mean_squared_error: 2.4540
Epoch 1: val_root_mean_squared_error improved from inf to 1.12117, saving model to cache/ensemble_camembert-base/models/mlp/20dc5a7c30b972d5f46a904ff2f2a465814973d60c99937698383c116f779ce3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8104 - root_mean_squared_error: 1.6115 - val_loss: 1.2570 - val_root_mean_squared_error: 1.1212
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.2797 - root_mean_squared_error: 1.1312
Epoch 2: val_root_mean_squared_error improved from 1.12117 to 0.91808, saving model to cache/ensemble_camembert-base/models/mlp/20dc5a7c30b972d5f46a904ff2f2a465814973d60c99937698383c116f779ce3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7028 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8206,2.525959,1.589327,1.387094,2.525959
46,1.269,0.599517,0.774285,0.610901,0.599517
69,0.756,0.578614,0.760667,0.594753,0.578614


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 6.8653 - root_mean_squared_error: 2.6202
Epoch 1: val_root_mean_squared_error improved from inf to 0.50975, saving model to cache/ensemble_camembert-base/models/mlp/82dea812ed409f71bd10886113d2c12dcbf6cf29487bc1a7021b481570f51114_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9414 - root_mean_squared_error: 1.6398 - val_loss: 0.2598 - val_root_mean_squared_error: 0.5098
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2939 - root_mean_squared_error: 0.5421
Epoch 2: val_root_mean_squared_error did not improve from 0.50975
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6769 - root_mean_squared_error: 0.8129 - val_loss: 0.4298 - val_root_mean_squared_error: 0.6556
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4203,2.17087,1.473387,1.270132,2.17087
46,1.0324,0.557444,0.746621,0.567165,0.557444
69,0.6628,0.455448,0.674869,0.520126,0.455448


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 8.3897 - root_mean_squared_error: 2.8965
Epoch 1: val_root_mean_squared_error improved from inf to 1.14711, saving model to cache/ensemble_camembert-base/models/mlp/8a8daf6234242730580b965d0e73408c163a9bf53472ff432856524258bdebae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8111 - root_mean_squared_error: 1.5933 - val_loss: 1.3159 - val_root_mean_squared_error: 1.1471
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0628 - root_mean_squared_error: 1.0309
Epoch 2: val_root_mean_squared_error improved from 1.14711 to 0.66299, saving model to cache/ensemble_camembert-base/models/mlp/8a8daf6234242730580b965d0e73408c163a9bf53472ff432856524258bdebae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5661 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.558,2.624151,1.619923,1.36992,2.624151
46,1.3301,0.742265,0.861548,0.677399,0.742265
69,0.8352,0.771859,0.878555,0.726676,0.771859


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.3123 - root_mean_squared_error: 2.7041
Epoch 1: val_root_mean_squared_error improved from inf to 0.53805, saving model to cache/ensemble_camembert-base/models/mlp/b9ff93eab94429be29c2d0f602b9728456f15e2cfc8b7a863fccda512aff9267_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2322 - root_mean_squared_error: 1.7183 - val_loss: 0.2895 - val_root_mean_squared_error: 0.5380
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2608 - root_mean_squared_error: 0.5107
Epoch 2: val_root_mean_squared_error improved from 0.53805 to 0.34083, saving model to cache/ensemble_camembert-base/models/mlp/b9ff93eab94429be29c2d0f602b9728456f15e2cfc8b7a863fccda512aff9267_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5103 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.191,2.050297,1.431886,1.142712,2.050297
46,1.1435,0.842721,0.917998,0.70511,0.842721
69,0.7255,0.959552,0.979567,0.789279,0.959552


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.6427 - root_mean_squared_error: 2.7645
Epoch 1: val_root_mean_squared_error improved from inf to 0.47635, saving model to cache/ensemble_camembert-base/models/mlp/b57476542ec5fa69960f61c669bcff9d4cbead69e96f6b365fe6e619f49f7986_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0072 - root_mean_squared_error: 1.6601 - val_loss: 0.2269 - val_root_mean_squared_error: 0.4763
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1996 - root_mean_squared_error: 0.4467
Epoch 2: val_root_mean_squared_error improved from 0.47635 to 0.37442, saving model to cache/ensemble_camembert-base/models/mlp/b57476542ec5fa69960f61c669bcff9d4cbead69e96f6b365fe6e619f49f7986_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3400 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8272,2.913573,1.706919,1.472789,2.913573
46,1.3095,0.766734,0.875633,0.732574,0.766734
69,0.6713,0.71728,0.846924,0.710218,0.71728


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 6.3270 - root_mean_squared_error: 2.5154
Epoch 1: val_root_mean_squared_error improved from inf to 0.66583, saving model to cache/ensemble_camembert-base/models/mlp/b1231c7726e942267345aed23b6c4e106ac628982b647fc0eb1020381072b8dd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6030 - root_mean_squared_error: 1.5471 - val_loss: 0.4433 - val_root_mean_squared_error: 0.6658
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 0.2326 - root_mean_squared_error: 0.4823
Epoch 2: val_root_mean_squared_error improved from 0.66583 to 0.48576, saving model to cache/ensemble_camembert-base/models/mlp/b1231c7726e942267345aed23b6c4e106ac628982b647fc0eb1020381072b8dd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4886 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4158,3.470741,1.862992,1.642627,3.47074
46,1.2129,0.877328,0.936658,0.750069,0.877328
69,0.6645,0.778942,0.882577,0.7395,0.778942


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 5.3876 - root_mean_squared_error: 2.3211
Epoch 1: val_root_mean_squared_error improved from inf to 0.52592, saving model to cache/ensemble_camembert-base/models/mlp/1c438edd9402e6c277a20b48bd7bdba653bbc56e86af9574031067c220a9e75c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6180 - root_mean_squared_error: 1.5610 - val_loss: 0.2766 - val_root_mean_squared_error: 0.5259
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3659 - root_mean_squared_error: 0.6049
Epoch 2: val_root_mean_squared_error improved from 0.52592 to 0.32992, saving model to cache/ensemble_camembert-base/models/mlp/1c438edd9402e6c277a20b48bd7bdba653bbc56e86af9574031067c220a9e75c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4111 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5926,3.002235,1.732696,1.502324,3.002235
46,1.0,0.753829,0.868233,0.728598,0.753829
69,0.6599,0.733382,0.856377,0.722573,0.733382


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 9.0568 - root_mean_squared_error: 3.0094
Epoch 1: val_root_mean_squared_error improved from inf to 0.58200, saving model to cache/ensemble_camembert-base/models/mlp/01d7f990a55e81391ea51d5c409c3dfb6918dda7e81097cb1c3dfd2e449fc07d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0800 - root_mean_squared_error: 1.6728 - val_loss: 0.3387 - val_root_mean_squared_error: 0.5820
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5357 - root_mean_squared_error: 0.7319
Epoch 2: val_root_mean_squared_error improved from 0.58200 to 0.43951, saving model to cache/ensemble_camembert-base/models/mlp/01d7f990a55e81391ea51d5c409c3dfb6918dda7e81097cb1c3dfd2e449fc07d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5301 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0783,1.689126,1.299664,1.036666,1.689126
46,1.0019,0.79638,0.892401,0.75828,0.79638
69,0.7238,0.673518,0.820682,0.675146,0.673518


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 6.8946 - root_mean_squared_error: 2.6258
Epoch 1: val_root_mean_squared_error improved from inf to 0.50902, saving model to cache/ensemble_camembert-base/models/mlp/adcded9736c4e074320477ff3acef76e782c7d2d394e8cb9e6f8d85873de9223_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5464 - root_mean_squared_error: 1.5244 - val_loss: 0.2591 - val_root_mean_squared_error: 0.5090
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2120 - root_mean_squared_error: 0.4604
Epoch 2: val_root_mean_squared_error improved from 0.50902 to 0.38703, saving model to cache/ensemble_camembert-base/models/mlp/adcded9736c4e074320477ff3acef76e782c7d2d394e8cb9e6f8d85873de9223_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3489 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.47,1.935553,1.391241,1.176192,1.935553
46,1.2605,0.627649,0.792243,0.693086,0.627649
69,0.659,0.537918,0.733429,0.643002,0.537918


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 6.6466 - root_mean_squared_error: 2.5781
Epoch 1: val_root_mean_squared_error improved from inf to 0.49827, saving model to cache/ensemble_camembert-base/models/mlp/c932e92e6feee931ae283d77f2f56af3425f1890a389b37f17ad2133a19f05ea_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6779 - root_mean_squared_error: 1.5658 - val_loss: 0.2483 - val_root_mean_squared_error: 0.4983
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6015 - root_mean_squared_error: 0.7755
Epoch 2: val_root_mean_squared_error improved from 0.49827 to 0.32418, saving model to cache/ensemble_camembert-base/models/mlp/c932e92e6feee931ae283d77f2f56af3425f1890a389b37f17ad2133a19f05ea_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5292 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5551,3.955476,1.988838,1.790462,3.955476
46,1.1095,0.919851,0.959089,0.753721,0.919851
69,0.7657,0.882383,0.939352,0.734168,0.882383


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 9.6997 - root_mean_squared_error: 3.1144
Epoch 1: val_root_mean_squared_error improved from inf to 0.83397, saving model to cache/ensemble_camembert-base/models/mlp/e51732a324a961fc74df5bc4e3432232d137f193ffaa79459e6941ab34f67eda_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8234 - root_mean_squared_error: 1.5928 - val_loss: 0.6955 - val_root_mean_squared_error: 0.8340
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.2255 - root_mean_squared_error: 1.1070
Epoch 2: val_root_mean_squared_error improved from 0.83397 to 0.81559, saving model to cache/ensemble_camembert-base/models/mlp/e51732a324a961fc74df5bc4e3432232d137f193ffaa79459e6941ab34f67eda_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6869 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4054,2.870765,1.694333,1.464067,2.870765
46,1.1234,0.730156,0.854492,0.692245,0.730156
69,0.6926,0.72695,0.852614,0.690263,0.72695


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 727ms/step - loss: 6.4261 - root_mean_squared_error: 2.5350
Epoch 1: val_root_mean_squared_error improved from inf to 0.79217, saving model to cache/ensemble_camembert-base/models/mlp/9fc2278b9fb032f58cd23e27c5f396b0a6db8587018b115d0d04cc171dba8904_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9593 - root_mean_squared_error: 1.6481 - val_loss: 0.6275 - val_root_mean_squared_error: 0.7922
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3719 - root_mean_squared_error: 0.6098
Epoch 2: val_root_mean_squared_error improved from 0.79217 to 0.47955, saving model to cache/ensemble_camembert-base/models/mlp/9fc2278b9fb032f58cd23e27c5f396b0a6db8587018b115d0d04cc171dba8904_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3620 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1053,2.06686,1.437658,1.210877,2.06686
46,1.193,0.658184,0.811285,0.661443,0.658184
69,0.7808,0.660466,0.812691,0.670345,0.660466


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 7.2850 - root_mean_squared_error: 2.6991
Epoch 1: val_root_mean_squared_error improved from inf to 0.38478, saving model to cache/ensemble_camembert-base/models/mlp/4b14a9b31868759a36dad1ae32f1121755df9f3cf2646e1e78ec148acbe1baf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4660 - root_mean_squared_error: 1.5061 - val_loss: 0.1481 - val_root_mean_squared_error: 0.3848
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1475 - root_mean_squared_error: 0.3841
Epoch 2: val_root_mean_squared_error improved from 0.38478 to 0.31788, saving model to cache/ensemble_camembert-base/models/mlp/4b14a9b31868759a36dad1ae32f1121755df9f3cf2646e1e78ec148acbe1baf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4320 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0075,3.303155,1.817459,1.555972,3.303155
46,1.3263,0.952909,0.97617,0.768583,0.952908
69,0.6846,0.927247,0.962937,0.76164,0.927247


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 646ms/step - loss: 7.5413 - root_mean_squared_error: 2.7461
Epoch 1: val_root_mean_squared_error improved from inf to 0.65119, saving model to cache/ensemble_camembert-base/models/mlp/6a317fed24385a19e50a76c87e2e9bdf452604069dca5e47ce84b9d420822dc3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8609 - root_mean_squared_error: 1.6128 - val_loss: 0.4240 - val_root_mean_squared_error: 0.6512
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3414 - root_mean_squared_error: 0.5843
Epoch 2: val_root_mean_squared_error did not improve from 0.65119
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5428 - root_mean_squared_error: 0.7309 - val_loss: 0.5391 - val_root_mean_squared_error: 0.7342
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8299,2.330871,1.526719,1.294495,2.330871
46,1.3703,0.672823,0.820258,0.678704,0.672823
69,0.7166,0.645649,0.803523,0.662478,0.645649


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 8.1147 - root_mean_squared_error: 2.8486
Epoch 1: val_root_mean_squared_error improved from inf to 0.52554, saving model to cache/ensemble_camembert-base/models/mlp/f4d6ad45a2f846178e015ed274b2c46dc44bf61d323a00ba1bf31acce30441f4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1570 - root_mean_squared_error: 1.6942 - val_loss: 0.2762 - val_root_mean_squared_error: 0.5255
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2923 - root_mean_squared_error: 0.5406
Epoch 2: val_root_mean_squared_error did not improve from 0.52554
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4359 - root_mean_squared_error: 0.6580 - val_loss: 0.4863 - val_root_mean_squared_error: 0.6974
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5791,2.125646,1.45796,1.302131,2.125646
46,1.1223,0.448528,0.669722,0.578799,0.448528
69,0.6443,0.41933,0.647557,0.543428,0.41933


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 7.4233 - root_mean_squared_error: 2.7246
Epoch 1: val_root_mean_squared_error improved from inf to 0.56891, saving model to cache/ensemble_camembert-base/models/mlp/5b836f84ffb4de512943c48d4d8d6886b250d16c0aa73b80ead9497c03af9a03_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9210 - root_mean_squared_error: 1.6359 - val_loss: 0.3237 - val_root_mean_squared_error: 0.5689
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4031 - root_mean_squared_error: 0.6349
Epoch 2: val_root_mean_squared_error improved from 0.56891 to 0.43851, saving model to cache/ensemble_camembert-base/models/mlp/5b836f84ffb4de512943c48d4d8d6886b250d16c0aa73b80ead9497c03af9a03_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4166 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7218,1.799696,1.341528,1.10115,1.799696
46,0.9703,0.70834,0.841629,0.661426,0.70834
69,0.7836,0.745672,0.863523,0.685939,0.745672


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 8.3091 - root_mean_squared_error: 2.8825
Epoch 1: val_root_mean_squared_error improved from inf to 0.52185, saving model to cache/ensemble_camembert-base/models/mlp/2d7d88ddd778d3577f4c03e1b87367fca288452ca48d42c8edba470f93ac6d9f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.6060 - root_mean_squared_error: 1.8131 - val_loss: 0.2723 - val_root_mean_squared_error: 0.5219
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5282 - root_mean_squared_error: 0.7268
Epoch 2: val_root_mean_squared_error did not improve from 0.52185
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4223 - root_mean_squared_error: 0.6472 - val_loss: 1.2586 - val_root_mean_squared_error: 1.1219
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2555,2.270335,1.506763,1.305416,2.270335
46,1.3931,0.564087,0.751057,0.536079,0.564087
69,0.693,0.513913,0.716878,0.523925,0.513913


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 8.1812 - root_mean_squared_error: 2.8603
Epoch 1: val_root_mean_squared_error improved from inf to 0.52857, saving model to cache/ensemble_camembert-base/models/mlp/729d3fc844a465cd30367969b82299cf786732c878aadace3f4321e8ece7baab_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1652 - root_mean_squared_error: 1.6991 - val_loss: 0.2794 - val_root_mean_squared_error: 0.5286
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1831 - root_mean_squared_error: 0.4279
Epoch 2: val_root_mean_squared_error did not improve from 0.52857
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3486 - root_mean_squared_error: 0.5850 - val_loss: 0.4475 - val_root_mean_squared_error: 0.6690
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0903,2.959229,1.720241,1.53583,2.959229
46,1.5025,0.641491,0.800931,0.620093,0.641491
69,0.7292,0.599751,0.774436,0.609267,0.599751


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 6.8479 - root_mean_squared_error: 2.6168
Epoch 1: val_root_mean_squared_error improved from inf to 0.40423, saving model to cache/ensemble_camembert-base/models/mlp/05f918c72a8624909f3a4048fa1ed18b5b836787eb3fffc3abd94bf7a9cc60f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7504 - root_mean_squared_error: 1.5839 - val_loss: 0.1634 - val_root_mean_squared_error: 0.4042
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1945 - root_mean_squared_error: 0.4411
Epoch 2: val_root_mean_squared_error improved from 0.40423 to 0.32939, saving model to cache/ensemble_camembert-base/models/mlp/05f918c72a8624909f3a4048fa1ed18b5b836787eb3fffc3abd94bf7a9cc60f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4500 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2207,2.416087,1.554377,1.351234,2.416087
46,1.274,0.594195,0.77084,0.624813,0.594195
69,0.6279,0.587109,0.76623,0.625405,0.587109


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 4.2425 - root_mean_squared_error: 2.0597
Epoch 1: val_root_mean_squared_error improved from inf to 0.59934, saving model to cache/ensemble_camembert-base/models/mlp/8b0a576710bd4fd4b5c445d90ace9a8c336879b7d5624fc8f538747883b9bf43_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7579 - root_mean_squared_error: 1.6025 - val_loss: 0.3592 - val_root_mean_squared_error: 0.5993
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2320 - root_mean_squared_error: 0.4817
Epoch 2: val_root_mean_squared_error did not improve from 0.59934
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3605 - root_mean_squared_error: 0.5980 - val_loss: 0.6404 - val_root_mean_squared_error: 0.8002
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7016,3.07801,1.754426,1.474161,3.07801
46,1.1528,0.909168,0.953503,0.805974,0.909168
69,0.6731,0.843879,0.918629,0.771894,0.843879


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 8.3510 - root_mean_squared_error: 2.8898
Epoch 1: val_root_mean_squared_error improved from inf to 0.92220, saving model to cache/ensemble_camembert-base/models/mlp/2e4074fc3daac0cf8624b261fd1b51eede9b79ed28cb22d3f62157c62f002976_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - loss: 2.9372 - root_mean_squared_error: 1.6308 - val_loss: 0.8505 - val_root_mean_squared_error: 0.9222
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6058 - root_mean_squared_error: 0.7783
Epoch 2: val_root_mean_squared_error improved from 0.92220 to 0.56082, saving model to cache/ensemble_camembert-base/models/mlp/2e4074fc3daac0cf8624b261fd1b51eede9b79ed28cb22d3f62157c62f002976_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3505 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.3832,2.359638,1.536111,1.281733,2.359638
46,1.0238,0.735174,0.857423,0.69269,0.735174
69,0.6275,0.686288,0.828425,0.644503,0.686288


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 6.3045 - root_mean_squared_error: 2.5109
Epoch 1: val_root_mean_squared_error improved from inf to 1.06497, saving model to cache/ensemble_camembert-base/models/mlp/d650433b311afb0f5a7ee9d54643396114b91dc70209a0dadce419cbfd82b09c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6514 - root_mean_squared_error: 1.5646 - val_loss: 1.1342 - val_root_mean_squared_error: 1.0650
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.5358 - root_mean_squared_error: 1.2393
Epoch 2: val_root_mean_squared_error improved from 1.06497 to 0.41609, saving model to cache/ensemble_camembert-base/models/mlp/d650433b311afb0f5a7ee9d54643396114b91dc70209a0dadce419cbfd82b09c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7099 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3339,2.233259,1.494409,1.25307,2.233259
46,1.1915,0.675954,0.822164,0.641773,0.675954
69,0.7362,0.694313,0.833254,0.657209,0.694313


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 6.4214 - root_mean_squared_error: 2.5341
Epoch 1: val_root_mean_squared_error improved from inf to 0.86028, saving model to cache/ensemble_camembert-base/models/mlp/32a7c1c4400e3bfc1d50effd75720a5b90065021af5f677f4bd56d519bf56cfb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7238 - root_mean_squared_error: 1.5801 - val_loss: 0.7401 - val_root_mean_squared_error: 0.8603
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9531 - root_mean_squared_error: 0.9762
Epoch 2: val_root_mean_squared_error did not improve from 0.86028
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6452 - root_mean_squared_error: 0.7905 - val_loss: 0.8862 - val_root_mean_squared_error: 0.9414
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9507,3.849906,1.962118,1.637365,3.849906
46,1.1639,1.231494,1.109727,0.894128,1.231494
69,0.6603,1.21854,1.103875,0.888657,1.21854


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 6.1182 - root_mean_squared_error: 2.4735
Epoch 1: val_root_mean_squared_error improved from inf to 0.79607, saving model to cache/ensemble_camembert-base/models/mlp/d329d22ae60ae3705b140afc4a0835d8e39cb1cdd5fe908c53d0975c9cf00374_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 3.4178 - root_mean_squared_error: 1.7741 - val_loss: 0.6337 - val_root_mean_squared_error: 0.7961
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.7677 - root_mean_squared_error: 0.8762
Epoch 2: val_root_mean_squared_error did not improve from 0.79607
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5681 - root_mean_squared_error: 0.7489 - val_loss: 0.6635 - val_root_mean_squared_error: 0.8145
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8861,2.780955,1.66762,1.403264,2.780955
46,1.3346,0.828322,0.910122,0.737032,0.828322
69,0.8683,0.841523,0.917346,0.75704,0.841523


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 6.6076 - root_mean_squared_error: 2.5705
Epoch 1: val_root_mean_squared_error improved from inf to 0.61233, saving model to cache/ensemble_camembert-base/models/mlp/c54a4b2764d57144aa35a81b18fd59f49ca3a2774a3a5a34ac0fb1c7a1bc4a05_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0541 - root_mean_squared_error: 1.6735 - val_loss: 0.3750 - val_root_mean_squared_error: 0.6123
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.4133 - root_mean_squared_error: 0.6429
Epoch 2: val_root_mean_squared_error improved from 0.61233 to 0.58791, saving model to cache/ensemble_camembert-base/models/mlp/c54a4b2764d57144aa35a81b18fd59f49ca3a2774a3a5a34ac0fb1c7a1bc4a05_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3634 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5035,2.194944,1.481534,1.264504,2.194944
46,1.2669,0.642861,0.801786,0.630116,0.642861
69,0.8253,0.591902,0.769352,0.591981,0.591902


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 6.0675 - root_mean_squared_error: 2.4632
Epoch 1: val_root_mean_squared_error improved from inf to 0.99952, saving model to cache/ensemble_camembert-base/models/mlp/cddb51286605f923863923afd27b5cf3e2ddd418200d8aa790ac0918441fe434_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3709 - root_mean_squared_error: 1.4756 - val_loss: 0.9990 - val_root_mean_squared_error: 0.9995
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3551 - root_mean_squared_error: 0.5959
Epoch 2: val_root_mean_squared_error did not improve from 0.99952
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6359 - root_mean_squared_error: 0.7879 - val_loss: 1.0222 - val_root_mean_squared_error: 1.0111
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8596,1.642426,1.281572,1.016857,1.642426
46,1.2116,0.748252,0.865016,0.73815,0.748252
69,0.8108,0.741971,0.861377,0.737064,0.741971


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 7.6666 - root_mean_squared_error: 2.7689
Epoch 1: val_root_mean_squared_error improved from inf to 0.94368, saving model to cache/ensemble_camembert-base/models/mlp/7a6ff45552d5717363278bc86c6a4621e4b99319cebd31724ce412890d8afd88_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3353 - root_mean_squared_error: 1.7438 - val_loss: 0.8905 - val_root_mean_squared_error: 0.9437
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 0.6829 - root_mean_squared_error: 0.8264
Epoch 2: val_root_mean_squared_error improved from 0.94368 to 0.44646, saving model to cache/ensemble_camembert-base/models/mlp/7a6ff45552d5717363278bc86c6a4621e4b99319cebd31724ce412890d8afd88_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4989 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.1756,2.549194,1.59662,1.308676,2.549194
46,1.0899,0.849059,0.921444,0.729762,0.849059
69,0.6901,0.78352,0.885167,0.70016,0.78352


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 5.6167 - root_mean_squared_error: 2.3700
Epoch 1: val_root_mean_squared_error improved from inf to 0.44093, saving model to cache/ensemble_camembert-base/models/mlp/2cf120b390bcc9aab0703bbf42c3fe38f3f194bbf3110a83c6dd749fe91ce56f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3673 - root_mean_squared_error: 1.4758 - val_loss: 0.1944 - val_root_mean_squared_error: 0.4409
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2009 - root_mean_squared_error: 0.4483
Epoch 2: val_root_mean_squared_error improved from 0.44093 to 0.43369, saving model to cache/ensemble_camembert-base/models/mlp/2cf120b390bcc9aab0703bbf42c3fe38f3f194bbf3110a83c6dd749fe91ce56f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4000 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1924,2.164573,1.471249,1.240052,2.164573
46,1.001,0.645378,0.803354,0.674966,0.645378
69,0.6881,0.349673,0.591332,0.488075,0.349673


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 8.8110 - root_mean_squared_error: 2.9683
Epoch 1: val_root_mean_squared_error improved from inf to 0.44221, saving model to cache/ensemble_camembert-base/models/mlp/6ad87dec33e32e920c9e6f619d536df3909cd7dd0ae8e5b0f1ae71e7d488bbd9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6098 - root_mean_squared_error: 1.5348 - val_loss: 0.1955 - val_root_mean_squared_error: 0.4422
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4720 - root_mean_squared_error: 0.6870
Epoch 2: val_root_mean_squared_error improved from 0.44221 to 0.40269, saving model to cache/ensemble_camembert-base/models/mlp/6ad87dec33e32e920c9e6f619d536df3909cd7dd0ae8e5b0f1ae71e7d488bbd9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4725 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7922,2.419669,1.555529,1.343215,2.419669
46,1.267,0.620614,0.78779,0.679273,0.620614
69,0.6387,0.595387,0.771613,0.66939,0.595387


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 5.9797 - root_mean_squared_error: 2.4453
Epoch 1: val_root_mean_squared_error improved from inf to 0.48671, saving model to cache/ensemble_camembert-base/models/mlp/31c0b2ff55f4281a1f3b3cf8ca7865078ef287ff3845e3d23ff72d6f396dd3f8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4089 - root_mean_squared_error: 1.4960 - val_loss: 0.2369 - val_root_mean_squared_error: 0.4867
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1317 - root_mean_squared_error: 0.3629
Epoch 2: val_root_mean_squared_error did not improve from 0.48671
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5078 - root_mean_squared_error: 0.7024 - val_loss: 0.3311 - val_root_mean_squared_error: 0.5754
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5055,2.279799,1.5099,1.245161,2.279799
46,1.2058,0.797644,0.893109,0.754949,0.797644
69,0.7787,0.774598,0.880112,0.745182,0.774598


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 7.7681 - root_mean_squared_error: 2.7871
Epoch 1: val_root_mean_squared_error improved from inf to 1.27565, saving model to cache/ensemble_camembert-base/models/mlp/9ce453fb82f9d212fef615ce0a5abcc3b757d10920fb36422429bf3054a8bcf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8332 - root_mean_squared_error: 1.6017 - val_loss: 1.6273 - val_root_mean_squared_error: 1.2757
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.2950 - root_mean_squared_error: 1.1380
Epoch 2: val_root_mean_squared_error improved from 1.27565 to 0.43140, saving model to cache/ensemble_camembert-base/models/mlp/9ce453fb82f9d212fef615ce0a5abcc3b757d10920fb36422429bf3054a8bcf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5015 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7119,1.863318,1.365034,1.147461,1.863318
46,1.2539,0.595469,0.771666,0.622476,0.595469
69,0.8132,0.513438,0.716546,0.576305,0.513438


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.0614 - root_mean_squared_error: 2.6573
Epoch 1: val_root_mean_squared_error improved from inf to 0.77695, saving model to cache/ensemble_camembert-base/models/mlp/b3d12113271a1ae4780fdb00ef78196bda9ced390594b308a7330fed0a11a5fc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7243 - root_mean_squared_error: 1.5768 - val_loss: 0.6036 - val_root_mean_squared_error: 0.7769
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5338 - root_mean_squared_error: 0.7306
Epoch 2: val_root_mean_squared_error improved from 0.77695 to 0.49968, saving model to cache/ensemble_camembert-base/models/mlp/b3d12113271a1ae4780fdb00ef78196bda9ced390594b308a7330fed0a11a5fc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3759 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0608,2.496474,1.580023,1.397218,2.496474
46,1.1864,0.551872,0.742881,0.618715,0.551872
69,0.7468,0.544661,0.738011,0.597432,0.544661


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 7.6858 - root_mean_squared_error: 2.7723
Epoch 1: val_root_mean_squared_error improved from inf to 0.57893, saving model to cache/ensemble_camembert-base/models/mlp/2d14f0773b4bfe5249f590528adfeaa3d45d80d093a261910c38588712f6d5fb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6420 - root_mean_squared_error: 1.5532 - val_loss: 0.3352 - val_root_mean_squared_error: 0.5789
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3022 - root_mean_squared_error: 0.5497
Epoch 2: val_root_mean_squared_error improved from 0.57893 to 0.48627, saving model to cache/ensemble_camembert-base/models/mlp/2d14f0773b4bfe5249f590528adfeaa3d45d80d093a261910c38588712f6d5fb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4326 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6556,2.22278,1.490899,1.284201,2.22278
46,1.2451,0.614323,0.783788,0.630409,0.614323
69,0.6292,0.598209,0.77344,0.620123,0.598209


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 8.6159 - root_mean_squared_error: 2.9353
Epoch 1: val_root_mean_squared_error improved from inf to 0.75865, saving model to cache/ensemble_camembert-base/models/mlp/fb15db77ca7b4b32458438535168f7866abf44508f2461eca1d790aca37cc452_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9128 - root_mean_squared_error: 1.6238 - val_loss: 0.5755 - val_root_mean_squared_error: 0.7586
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9647 - root_mean_squared_error: 0.9822
Epoch 2: val_root_mean_squared_error improved from 0.75865 to 0.53290, saving model to cache/ensemble_camembert-base/models/mlp/fb15db77ca7b4b32458438535168f7866abf44508f2461eca1d790aca37cc452_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4747 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0696,1.631866,1.277445,1.087154,1.631866
46,1.3554,0.616156,0.784956,0.650695,0.616156
69,0.8134,0.705259,0.839797,0.691767,0.705259


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 7.5899 - root_mean_squared_error: 2.7550
Epoch 1: val_root_mean_squared_error improved from inf to 0.41496, saving model to cache/ensemble_camembert-base/models/mlp/7f8613b59f7db85030751d7658ef6a935767965a015ce2f1ed1b6bc2a201ec5f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5891 - root_mean_squared_error: 1.5335 - val_loss: 0.1722 - val_root_mean_squared_error: 0.4150
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3270 - root_mean_squared_error: 0.5718
Epoch 2: val_root_mean_squared_error improved from 0.41496 to 0.30476, saving model to cache/ensemble_camembert-base/models/mlp/7f8613b59f7db85030751d7658ef6a935767965a015ce2f1ed1b6bc2a201ec5f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4330 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.501,2.97901,1.725981,1.519268,2.97901
46,0.9937,0.702495,0.83815,0.615468,0.702495
69,0.6153,0.725185,0.851578,0.619028,0.725185


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 5.3617 - root_mean_squared_error: 2.3155
Epoch 1: val_root_mean_squared_error improved from inf to 0.86402, saving model to cache/ensemble_camembert-base/models/mlp/05e83a077e4a427b585511ef0983c3b158f18e10abd028154e848a831b7e2bf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6161 - root_mean_squared_error: 1.5520 - val_loss: 0.7465 - val_root_mean_squared_error: 0.8640
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.2296 - root_mean_squared_error: 1.1089
Epoch 2: val_root_mean_squared_error did not improve from 0.86402
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7465 - root_mean_squared_error: 0.8401 - val_loss: 1.5103 - val_root_mean_squared_error: 1.2289
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9589,2.603922,1.613667,1.394025,2.603922
46,0.9385,0.718063,0.847386,0.70804,0.718063
69,0.685,0.699153,0.836154,0.692662,0.699153


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 6.3514 - root_mean_squared_error: 2.5202
Epoch 1: val_root_mean_squared_error improved from inf to 0.54111, saving model to cache/ensemble_camembert-base/models/mlp/8e6bd0e848370fd850618627cd67cd13d5cc6688b55a79dfb203044131fcc694_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1014 - root_mean_squared_error: 1.6898 - val_loss: 0.2928 - val_root_mean_squared_error: 0.5411
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3144 - root_mean_squared_error: 0.5607
Epoch 2: val_root_mean_squared_error did not improve from 0.54111
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4161 - root_mean_squared_error: 0.6405 - val_loss: 0.7839 - val_root_mean_squared_error: 0.8854
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.0256,2.868362,1.693624,1.415603,2.868362
46,1.1956,0.867515,0.931405,0.771341,0.867515
69,0.6416,0.853575,0.923891,0.765135,0.853575


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 5.3740 - root_mean_squared_error: 2.3182
Epoch 1: val_root_mean_squared_error improved from inf to 0.66286, saving model to cache/ensemble_camembert-base/models/mlp/8706cd282c3d6a2fc5a3502977f507bfff9ef096a6c5c8c27e1c499283164208_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5530 - root_mean_squared_error: 1.5339 - val_loss: 0.4394 - val_root_mean_squared_error: 0.6629
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4325 - root_mean_squared_error: 0.6576
Epoch 2: val_root_mean_squared_error improved from 0.66286 to 0.44432, saving model to cache/ensemble_camembert-base/models/mlp/8706cd282c3d6a2fc5a3502977f507bfff9ef096a6c5c8c27e1c499283164208_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4067 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2305,1.650072,1.284551,1.069742,1.650072
46,1.1647,0.647327,0.804566,0.691214,0.647327
69,0.7049,0.574869,0.758201,0.653842,0.574869


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 8.0212 - root_mean_squared_error: 2.8322
Epoch 1: val_root_mean_squared_error improved from inf to 0.58263, saving model to cache/ensemble_camembert-base/models/mlp/62b500108621b636ec12f62e5286816fb9c63ac8922241116fe1db4ab003f5a4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5344 - root_mean_squared_error: 1.5180 - val_loss: 0.3395 - val_root_mean_squared_error: 0.5826
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7760 - root_mean_squared_error: 0.8809
Epoch 2: val_root_mean_squared_error did not improve from 0.58263
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5836 - root_mean_squared_error: 0.7574 - val_loss: 0.4043 - val_root_mean_squared_error: 0.6359
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8053,2.415943,1.55433,1.289749,2.415943
46,1.0942,0.804087,0.896709,0.736881,0.804087
69,0.769,0.781641,0.884104,0.726674,0.781641


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 8.5314 - root_mean_squared_error: 2.9209
Epoch 1: val_root_mean_squared_error improved from inf to 0.79559, saving model to cache/ensemble_camembert-base/models/mlp/7a7eea7e7a86a125ec203f4a4f24481093e7e511db45ae4644e2d0793fc87cf3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5667 - root_mean_squared_error: 1.5188 - val_loss: 0.6330 - val_root_mean_squared_error: 0.7956
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6905 - root_mean_squared_error: 0.8310
Epoch 2: val_root_mean_squared_error did not improve from 0.79559
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5732 - root_mean_squared_error: 0.7463 - val_loss: 0.8293 - val_root_mean_squared_error: 0.9107
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6931,2.565127,1.601601,1.409368,2.565127
46,1.083,0.621395,0.788286,0.675118,0.621395
69,0.7923,0.61623,0.785003,0.669196,0.61623


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 6.3072 - root_mean_squared_error: 2.5114
Epoch 1: val_root_mean_squared_error improved from inf to 0.59595, saving model to cache/ensemble_camembert-base/models/mlp/449cfe614dfd3c5996773e7251c1cb6017ba5d18fd2caa52ae564b551b06c9ae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 2.9905 - root_mean_squared_error: 1.6587 - val_loss: 0.3552 - val_root_mean_squared_error: 0.5959
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4277 - root_mean_squared_error: 0.6540
Epoch 2: val_root_mean_squared_error did not improve from 0.59595
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4487 - root_mean_squared_error: 0.6672 - val_loss: 1.1847 - val_root_mean_squared_error: 1.0884
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0725,3.01649,1.736804,1.628896,3.016489
46,1.153,0.446832,0.668455,0.547769,0.446832
69,0.7965,0.380686,0.616998,0.503654,0.380686


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.0233 - root_mean_squared_error: 2.6501
Epoch 1: val_root_mean_squared_error improved from inf to 0.32973, saving model to cache/ensemble_camembert-base/models/mlp/b02635b353ce350e03d5c0dd95017a87e09ac59a52b0b013df758b872d460cd4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8217 - root_mean_squared_error: 1.6136 - val_loss: 0.1087 - val_root_mean_squared_error: 0.3297
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1835 - root_mean_squared_error: 0.4283
Epoch 2: val_root_mean_squared_error did not improve from 0.32973
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3425 - root_mean_squared_error: 0.5827 - val_loss: 0.1908 - val_root_mean_squared_error: 0.4368
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8352,2.405514,1.550972,1.330431,2.405514
46,1.0891,0.689463,0.830339,0.671092,0.689463
69,0.6258,0.61993,0.787356,0.630182,0.61993


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 645ms/step - loss: 6.8567 - root_mean_squared_error: 2.6185
Epoch 1: val_root_mean_squared_error improved from inf to 0.68778, saving model to cache/ensemble_camembert-base/models/mlp/a78ee78af3a1dd89cdc55aa00c1aac8d3aa35d5b29d03386003de893c851edb9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8774 - root_mean_squared_error: 1.6242 - val_loss: 0.4730 - val_root_mean_squared_error: 0.6878
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4910 - root_mean_squared_error: 0.7007
Epoch 2: val_root_mean_squared_error did not improve from 0.68778
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6228 - root_mean_squared_error: 0.7827 - val_loss: 0.5110 - val_root_mean_squared_error: 0.7148
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.713,3.099353,1.760498,1.559374,3.099353
46,1.1904,0.69724,0.835009,0.658113,0.69724
69,0.8052,0.700583,0.837008,0.662571,0.700583


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 8.1033 - root_mean_squared_error: 2.8466
Epoch 1: val_root_mean_squared_error improved from inf to 0.81666, saving model to cache/ensemble_camembert-base/models/mlp/fce56e14055251e0f451b24abe8b38864441355c13a0988680d8e59a9d4aa7cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0139 - root_mean_squared_error: 1.6565 - val_loss: 0.6669 - val_root_mean_squared_error: 0.8167
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.8060 - root_mean_squared_error: 0.8978
Epoch 2: val_root_mean_squared_error improved from 0.81666 to 0.52384, saving model to cache/ensemble_camembert-base/models/mlp/fce56e14055251e0f451b24abe8b38864441355c13a0988680d8e59a9d4aa7cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4894 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9497,1.941019,1.393204,1.16362,1.941019
46,1.2727,0.662118,0.813706,0.675283,0.662118
69,0.6688,0.640976,0.80061,0.668686,0.640976


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 7.8202 - root_mean_squared_error: 2.7965
Epoch 1: val_root_mean_squared_error improved from inf to 0.74213, saving model to cache/ensemble_camembert-base/models/mlp/259a29c223c4f0621b8fb7b491f34d52e57debac8c638546bd722bd615b1e016_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1482 - root_mean_squared_error: 1.6978 - val_loss: 0.5508 - val_root_mean_squared_error: 0.7421
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4718 - root_mean_squared_error: 0.6869
Epoch 2: val_root_mean_squared_error did not improve from 0.74213
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4475 - root_mean_squared_error: 0.6684 - val_loss: 0.5708 - val_root_mean_squared_error: 0.7555
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.542,2.62964,1.621617,1.363701,2.62964
46,0.8385,0.824688,0.908124,0.720768,0.824688
69,0.7643,0.75429,0.868498,0.674564,0.75429


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 645ms/step - loss: 5.6093 - root_mean_squared_error: 2.3684
Epoch 1: val_root_mean_squared_error improved from inf to 0.47275, saving model to cache/ensemble_camembert-base/models/mlp/ee41604dc9331dd6989c5f07326ccf061b477426053394dd33730d492f40cbb2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1308 - root_mean_squared_error: 1.6950 - val_loss: 0.2235 - val_root_mean_squared_error: 0.4728
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.0932 - root_mean_squared_error: 0.3053
Epoch 2: val_root_mean_squared_error improved from 0.47275 to 0.45223, saving model to cache/ensemble_camembert-base/models/mlp/ee41604dc9331dd6989c5f07326ccf061b477426053394dd33730d492f40cbb2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3885 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0905,2.970844,1.723614,1.487864,2.970844
46,1.2338,0.780498,0.883458,0.654446,0.780498
69,0.7463,0.754191,0.868442,0.693699,0.754191


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 4.7922 - root_mean_squared_error: 2.1891
Epoch 1: val_root_mean_squared_error improved from inf to 0.46593, saving model to cache/ensemble_camembert-base/models/mlp/fd1da1f95edf6c34e87dcfeeb7f2ca907ca45b23ce1b33d53f686a1475ce0068_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8576 - root_mean_squared_error: 1.6292 - val_loss: 0.2171 - val_root_mean_squared_error: 0.4659
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1908 - root_mean_squared_error: 0.4368
Epoch 2: val_root_mean_squared_error improved from 0.46593 to 0.32454, saving model to cache/ensemble_camembert-base/models/mlp/fd1da1f95edf6c34e87dcfeeb7f2ca907ca45b23ce1b33d53f686a1475ce0068_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3137 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1179,2.970611,1.723546,1.549805,2.97061
46,1.2712,0.593642,0.770482,0.604834,0.593642
69,0.6744,0.550533,0.741979,0.596667,0.550533


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 9.4315 - root_mean_squared_error: 3.0711
Epoch 1: val_root_mean_squared_error improved from inf to 0.44982, saving model to cache/ensemble_camembert-base/models/mlp/694d14fdcdb222f2a58d0357a177ab83ac3da93c5c6ce984ef00c296f7e3e283_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1715 - root_mean_squared_error: 1.6853 - val_loss: 0.2023 - val_root_mean_squared_error: 0.4498
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2486 - root_mean_squared_error: 0.4986
Epoch 2: val_root_mean_squared_error did not improve from 0.44982
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4159 - root_mean_squared_error: 0.6378 - val_loss: 1.1420 - val_root_mean_squared_error: 1.0686
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5901,3.305755,1.818173,1.494985,3.305755
46,1.5007,1.098855,1.048263,0.804068,1.098856
69,0.8287,1.070072,1.034443,0.841512,1.070072


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 9.6455 - root_mean_squared_error: 3.1057
Epoch 1: val_root_mean_squared_error improved from inf to 0.59597, saving model to cache/ensemble_camembert-base/models/mlp/1317f9ea9be7bd9900517cbccd77e6f78d6edfe96bca75952893a566ae04cd40_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4378 - root_mean_squared_error: 1.7605 - val_loss: 0.3552 - val_root_mean_squared_error: 0.5960
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 0.3277 - root_mean_squared_error: 0.5724
Epoch 2: val_root_mean_squared_error did not improve from 0.59597
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6050 - root_mean_squared_error: 0.7726 - val_loss: 1.2815 - val_root_mean_squared_error: 1.1320
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.704,2.973564,1.724402,1.487068,2.973564
46,1.3608,0.760444,0.872035,0.677976,0.760444
69,0.8373,0.723184,0.850402,0.671948,0.723184


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 5.6455 - root_mean_squared_error: 2.3760
Epoch 1: val_root_mean_squared_error improved from inf to 0.83580, saving model to cache/ensemble_camembert-base/models/mlp/701dbc40f09fd92c680961214a1a63a3c54cd1a3cb7be7f2ebaf57173e48dc94_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4968 - root_mean_squared_error: 1.5188 - val_loss: 0.6986 - val_root_mean_squared_error: 0.8358
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.9019 - root_mean_squared_error: 0.9497
Epoch 2: val_root_mean_squared_error improved from 0.83580 to 0.67064, saving model to cache/ensemble_camembert-base/models/mlp/701dbc40f09fd92c680961214a1a63a3c54cd1a3cb7be7f2ebaf57173e48dc94_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7092 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0462,2.781338,1.667734,1.460706,2.781338
46,1.6304,0.671761,0.819611,0.608577,0.671762
69,0.8401,0.617582,0.785864,0.588658,0.617582


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 9.4140 - root_mean_squared_error: 3.0682
Epoch 1: val_root_mean_squared_error improved from inf to 0.95457, saving model to cache/ensemble_camembert-base/models/mlp/92035b5d01c00a7e8fe793e43cafc57db5bb34a479141d42b29969334e7bf716_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2233 - root_mean_squared_error: 1.7110 - val_loss: 0.9112 - val_root_mean_squared_error: 0.9546
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 0.8185 - root_mean_squared_error: 0.9047
Epoch 2: val_root_mean_squared_error improved from 0.95457 to 0.76933, saving model to cache/ensemble_camembert-base/models/mlp/92035b5d01c00a7e8fe793e43cafc57db5bb34a479141d42b29969334e7bf716_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5156 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2374,2.48854,1.577511,1.324649,2.48854
46,1.6083,0.74262,0.861754,0.734024,0.74262
69,0.7625,0.728384,0.853454,0.742522,0.728384


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 7.9631 - root_mean_squared_error: 2.8219
Epoch 1: val_root_mean_squared_error improved from inf to 1.01770, saving model to cache/ensemble_camembert-base/models/mlp/e9bc56bdc668459292acc9d2bed56e5f9d44bea0021e35b346d0ba93bcc1987d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4829 - root_mean_squared_error: 1.7839 - val_loss: 1.0357 - val_root_mean_squared_error: 1.0177
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6871 - root_mean_squared_error: 0.8289
Epoch 2: val_root_mean_squared_error improved from 1.01770 to 0.99905, saving model to cache/ensemble_camembert-base/models/mlp/e9bc56bdc668459292acc9d2bed56e5f9d44bea0021e35b346d0ba93bcc1987d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5807 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2295,2.638219,1.62426,1.328529,2.638219
46,1.3587,0.894336,0.945694,0.729356,0.894336
69,0.8235,0.91827,0.958264,0.750453,0.91827


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 5.7729 - root_mean_squared_error: 2.4027
Epoch 1: val_root_mean_squared_error improved from inf to 0.65439, saving model to cache/ensemble_camembert-base/models/mlp/42d2e3518cfd5a1ec6525ff8a1e5fba0fea095bb2cada4e17fcf5cf314d404b2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - loss: 2.9307 - root_mean_squared_error: 1.6542 - val_loss: 0.4282 - val_root_mean_squared_error: 0.6544
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1768 - root_mean_squared_error: 0.4205
Epoch 2: val_root_mean_squared_error did not improve from 0.65439
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5873 - root_mean_squared_error: 0.7576 - val_loss: 0.5048 - val_root_mean_squared_error: 0.7105
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8648,3.119652,1.766254,1.466362,3.119652
46,1.2245,0.940196,0.969637,0.77029,0.940196
69,0.7372,0.814195,0.902328,0.71178,0.814195


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 7.4982 - root_mean_squared_error: 2.7383
Epoch 1: val_root_mean_squared_error improved from inf to 0.84998, saving model to cache/ensemble_camembert-base/models/mlp/d76fa36962a3a7b8dbaababfc4bb626211eccd9f7b4e179a0034a461d0a907f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4027 - root_mean_squared_error: 1.7723 - val_loss: 0.7225 - val_root_mean_squared_error: 0.8500
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4757 - root_mean_squared_error: 0.6897
Epoch 2: val_root_mean_squared_error improved from 0.84998 to 0.71833, saving model to cache/ensemble_camembert-base/models/mlp/d76fa36962a3a7b8dbaababfc4bb626211eccd9f7b4e179a0034a461d0a907f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4851 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9513,3.800959,1.949605,1.752453,3.800959
46,1.4354,0.851192,0.922601,0.695919,0.851192
69,0.8698,0.74486,0.863053,0.675702,0.74486


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 7.4653 - root_mean_squared_error: 2.7323
Epoch 1: val_root_mean_squared_error improved from inf to 0.75651, saving model to cache/ensemble_camembert-base/models/mlp/bf22f9fee59e97230271e6f6bb46c42e8d0d6f582757583ed547c11eaa6567e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0574 - root_mean_squared_error: 1.6694 - val_loss: 0.5723 - val_root_mean_squared_error: 0.7565
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 0.7349 - root_mean_squared_error: 0.8573
Epoch 2: val_root_mean_squared_error improved from 0.75651 to 0.62350, saving model to cache/ensemble_camembert-base/models/mlp/bf22f9fee59e97230271e6f6bb46c42e8d0d6f582757583ed547c11eaa6567e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6203 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.354,3.517455,1.875488,1.604553,3.517455
46,1.4129,0.989045,0.994507,0.76929,0.989045
69,0.698,0.973239,0.986529,0.767029,0.973239


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 684ms/step - loss: 7.3807 - root_mean_squared_error: 2.7167
Epoch 1: val_root_mean_squared_error improved from inf to 0.65504, saving model to cache/ensemble_camembert-base/models/mlp/d63f653d22091a00061a83204896cece2b5f80614b4cf1d648815be5f7c16796_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0387 - root_mean_squared_error: 1.6661 - val_loss: 0.4291 - val_root_mean_squared_error: 0.6550
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3992 - root_mean_squared_error: 0.6318
Epoch 2: val_root_mean_squared_error improved from 0.65504 to 0.49838, saving model to cache/ensemble_camembert-base/models/mlp/d63f653d22091a00061a83204896cece2b5f80614b4cf1d648815be5f7c16796_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5604 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0121,3.636672,1.907006,1.541113,3.636672
46,1.2512,1.311737,1.145311,0.958269,1.311737
69,0.7301,1.241934,1.114421,0.937368,1.241934


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 8.7726 - root_mean_squared_error: 2.9619
Epoch 1: val_root_mean_squared_error improved from inf to 0.61695, saving model to cache/ensemble_camembert-base/models/mlp/9b8bb1f2957cdecfc3cf8eb772fa9ad458d9f6bc0519e49025c3d25577a338fa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7266 - root_mean_squared_error: 1.5744 - val_loss: 0.3806 - val_root_mean_squared_error: 0.6169
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3667 - root_mean_squared_error: 0.6055
Epoch 2: val_root_mean_squared_error improved from 0.61695 to 0.35818, saving model to cache/ensemble_camembert-base/models/mlp/9b8bb1f2957cdecfc3cf8eb772fa9ad458d9f6bc0519e49025c3d25577a338fa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4259 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1443,2.931485,1.712158,1.388286,2.931485
46,1.2594,1.030292,1.015033,0.818374,1.030292
69,0.8046,1.040668,1.020131,0.830164,1.040668


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 4.8429 - root_mean_squared_error: 2.2006
Epoch 1: val_root_mean_squared_error improved from inf to 0.61173, saving model to cache/ensemble_camembert-base/models/mlp/5d7cdf0936f0a4e25ee11292edec8c52bc105f1482c8aab7178f37df73c5e15d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1383 - root_mean_squared_error: 1.7088 - val_loss: 0.3742 - val_root_mean_squared_error: 0.6117
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2391 - root_mean_squared_error: 0.4889
Epoch 2: val_root_mean_squared_error improved from 0.61173 to 0.51206, saving model to cache/ensemble_camembert-base/models/mlp/5d7cdf0936f0a4e25ee11292edec8c52bc105f1482c8aab7178f37df73c5e15d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4016 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7381,1.667155,1.291184,1.038463,1.667155
46,1.2968,0.763841,0.87398,0.745083,0.763841
69,0.7624,0.707392,0.841066,0.721699,0.707392


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 5.0423 - root_mean_squared_error: 2.2455
Epoch 1: val_root_mean_squared_error improved from inf to 0.99253, saving model to cache/ensemble_camembert-base/models/mlp/36cae4e60396940186b8a8af7df47a187e45d14a466c01cf6f5762720bea7086_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5443 - root_mean_squared_error: 1.5422 - val_loss: 0.9851 - val_root_mean_squared_error: 0.9925
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 1.5134 - root_mean_squared_error: 1.2302
Epoch 2: val_root_mean_squared_error improved from 0.99253 to 0.89403, saving model to cache/ensemble_camembert-base/models/mlp/36cae4e60396940186b8a8af7df47a187e45d14a466c01cf6f5762720bea7086_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6103 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7755,2.453369,1.566324,1.265704,2.45337
46,1.3435,0.912407,0.9552,0.842864,0.912407
69,0.8547,0.904835,0.951228,0.834564,0.904835


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 698ms/step - loss: 5.1636 - root_mean_squared_error: 2.2724
Epoch 1: val_root_mean_squared_error improved from inf to 1.17615, saving model to cache/ensemble_camembert-base/models/mlp/52e40882792762da3af77a9f62e4a66946752ca458244acab20968447e296845_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 2.8555 - root_mean_squared_error: 1.6325 - val_loss: 1.3833 - val_root_mean_squared_error: 1.1762
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 1.0470 - root_mean_squared_error: 1.0233
Epoch 2: val_root_mean_squared_error improved from 1.17615 to 0.59141, saving model to cache/ensemble_camembert-base/models/mlp/52e40882792762da3af77a9f62e4a66946752ca458244acab20968447e296845_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4916 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1236,1.817803,1.348259,1.147773,1.817803
46,1.2292,0.621232,0.788183,0.651975,0.621232
69,0.9004,0.657586,0.810916,0.662791,0.657585


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 7.0930 - root_mean_squared_error: 2.6633
Epoch 1: val_root_mean_squared_error improved from inf to 0.67116, saving model to cache/ensemble_camembert-base/models/mlp/d8a78bcc755f00beb30b0d66cdd1d080c47a38dc7dcb2bcf473dfd05ac6243a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0835 - root_mean_squared_error: 1.6915 - val_loss: 0.4505 - val_root_mean_squared_error: 0.6712
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.6219 - root_mean_squared_error: 0.7886
Epoch 2: val_root_mean_squared_error improved from 0.67116 to 0.43224, saving model to cache/ensemble_camembert-base/models/mlp/d8a78bcc755f00beb30b0d66cdd1d080c47a38dc7dcb2bcf473dfd05ac6243a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5904 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8295,2.541231,1.594124,1.372017,2.541231
46,1.4596,0.686221,0.828385,0.645705,0.686221
69,0.7774,0.678483,0.823701,0.641821,0.678483


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 5.9910 - root_mean_squared_error: 2.4477
Epoch 1: val_root_mean_squared_error improved from inf to 0.81168, saving model to cache/ensemble_camembert-base/models/mlp/fe00b919cce02eb6972c2be680c2edf4a3a1c7a230cf3ffc08c72dc0be23c7c6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5926 - root_mean_squared_error: 1.5453 - val_loss: 0.6588 - val_root_mean_squared_error: 0.8117
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.1221 - root_mean_squared_error: 1.0593
Epoch 2: val_root_mean_squared_error did not improve from 0.81168
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.1194 - root_mean_squared_error: 1.0355 - val_loss: 1.0111 - val_root_mean_squared_error: 1.0055
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.453,3.572272,1.890045,1.601715,3.572272
46,1.0941,1.060698,1.029902,0.786878,1.060698
69,0.696,0.992664,0.996325,0.713478,0.992664


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 7.6770 - root_mean_squared_error: 2.7707
Epoch 1: val_root_mean_squared_error improved from inf to 0.83319, saving model to cache/ensemble_camembert-base/models/mlp/ab559aa5862b70a006233f7c6a0c8bf491458648d5a824224e0998757f9397cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9492 - root_mean_squared_error: 1.6446 - val_loss: 0.6942 - val_root_mean_squared_error: 0.8332
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.1218 - root_mean_squared_error: 1.0592
Epoch 2: val_root_mean_squared_error improved from 0.83319 to 0.46948, saving model to cache/ensemble_camembert-base/models/mlp/ab559aa5862b70a006233f7c6a0c8bf491458648d5a824224e0998757f9397cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5165 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7626,2.612182,1.616225,1.298268,2.612182
46,1.4898,0.968619,0.984185,0.803808,0.968619
69,0.9062,1.008163,1.004073,0.833672,1.008164


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 674ms/step - loss: 7.4638 - root_mean_squared_error: 2.7320
Epoch 1: val_root_mean_squared_error improved from inf to 0.71409, saving model to cache/ensemble_camembert-base/models/mlp/f568d769bb4c9d71a8e806e0215d40555f905eba45fdb0137b769bdf42302fad_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4023 - root_mean_squared_error: 1.7691 - val_loss: 0.5099 - val_root_mean_squared_error: 0.7141
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3431 - root_mean_squared_error: 0.5857
Epoch 2: val_root_mean_squared_error improved from 0.71409 to 0.51406, saving model to cache/ensemble_camembert-base/models/mlp/f568d769bb4c9d71a8e806e0215d40555f905eba45fdb0137b769bdf42302fad_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4116 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.168,2.160736,1.469944,1.109608,2.160736
46,1.3091,1.078738,1.038623,0.86018,1.078739
69,0.8853,1.258897,1.122006,0.956837,1.258897


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 7.2972 - root_mean_squared_error: 2.7013
Epoch 1: val_root_mean_squared_error improved from inf to 0.43092, saving model to cache/ensemble_camembert-base/models/mlp/2a1450dcea469212b21ac24506fdbdb637f168aa1883cb2e960ac29c15a26b4b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9682 - root_mean_squared_error: 1.6563 - val_loss: 0.1857 - val_root_mean_squared_error: 0.4309
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2457 - root_mean_squared_error: 0.4956
Epoch 2: val_root_mean_squared_error did not improve from 0.43092
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4183 - root_mean_squared_error: 0.6377 - val_loss: 0.3466 - val_root_mean_squared_error: 0.5887
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9908,2.932014,1.712313,1.482702,2.932014
46,1.429,0.758882,0.871139,0.689855,0.758882
69,0.778,0.724895,0.851408,0.678015,0.724895


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 5.5806 - root_mean_squared_error: 2.3623
Epoch 1: val_root_mean_squared_error improved from inf to 0.77571, saving model to cache/ensemble_camembert-base/models/mlp/a8c50e0ccfbdf8289669f3831aca27e988fa7a974369f1f360490b0c10204ec2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3540 - root_mean_squared_error: 1.4759 - val_loss: 0.6017 - val_root_mean_squared_error: 0.7757
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3468 - root_mean_squared_error: 0.5889
Epoch 2: val_root_mean_squared_error improved from 0.77571 to 0.49804, saving model to cache/ensemble_camembert-base/models/mlp/a8c50e0ccfbdf8289669f3831aca27e988fa7a974369f1f360490b0c10204ec2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7107 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8098,2.917155,1.707968,1.454439,2.917155
46,1.4538,0.810796,0.900442,0.749641,0.810796
69,0.8773,0.79479,0.89151,0.779135,0.79479


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 4.4706 - root_mean_squared_error: 2.1144
Epoch 1: val_root_mean_squared_error improved from inf to 0.61610, saving model to cache/ensemble_camembert-base/models/mlp/029cb19e3a95824d844fbc08009f74d622678a2a405e362220d0376b1e3a1455_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7580 - root_mean_squared_error: 1.6081 - val_loss: 0.3796 - val_root_mean_squared_error: 0.6161
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5450 - root_mean_squared_error: 0.7382
Epoch 2: val_root_mean_squared_error improved from 0.61610 to 0.47349, saving model to cache/ensemble_camembert-base/models/mlp/029cb19e3a95824d844fbc08009f74d622678a2a405e362220d0376b1e3a1455_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5722 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9578,2.709514,1.64606,1.424548,2.709513
46,1.2429,0.734522,0.857042,0.727246,0.734522
69,0.8257,0.70644,0.8405,0.69849,0.70644


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 9.4073 - root_mean_squared_error: 3.0671
Epoch 1: val_root_mean_squared_error improved from inf to 0.70912, saving model to cache/ensemble_camembert-base/models/mlp/cc10f007e808c8dc21f55f1c2a0c21c56a16da9adedad783023f786659c2d540_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3236 - root_mean_squared_error: 1.7460 - val_loss: 0.5029 - val_root_mean_squared_error: 0.7091
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6486 - root_mean_squared_error: 0.8054
Epoch 2: val_root_mean_squared_error improved from 0.70912 to 0.57006, saving model to cache/ensemble_camembert-base/models/mlp/cc10f007e808c8dc21f55f1c2a0c21c56a16da9adedad783023f786659c2d540_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5922 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2341,1.901725,1.37903,1.078195,1.901725
46,1.1685,0.908157,0.952973,0.802407,0.908157
69,0.9164,0.85956,0.927125,0.766334,0.85956


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 5.4364 - root_mean_squared_error: 2.3316
Epoch 1: val_root_mean_squared_error improved from inf to 0.54197, saving model to cache/ensemble_camembert-base/models/mlp/facef04534cb76ceb729f0fe40b42eae02a897d75b6e54c654d350b2882cf476_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5111 - root_mean_squared_error: 1.5276 - val_loss: 0.2937 - val_root_mean_squared_error: 0.5420
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2545 - root_mean_squared_error: 0.5045
Epoch 2: val_root_mean_squared_error did not improve from 0.54197
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5409 - root_mean_squared_error: 0.7297 - val_loss: 0.6261 - val_root_mean_squared_error: 0.7913
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7097,1.632502,1.277694,1.072355,1.632502
46,1.4711,0.612323,0.782511,0.685443,0.612323
69,0.8446,0.524988,0.724561,0.640839,0.524988


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 8.6742 - root_mean_squared_error: 2.9452
Epoch 1: val_root_mean_squared_error improved from inf to 0.70690, saving model to cache/ensemble_camembert-base/models/mlp/34f11206e998742daa38eab2cc516982d5615cb3084e509514f0b25124fe293a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0703 - root_mean_squared_error: 1.6686 - val_loss: 0.4997 - val_root_mean_squared_error: 0.7069
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8157 - root_mean_squared_error: 0.9032
Epoch 2: val_root_mean_squared_error improved from 0.70690 to 0.44695, saving model to cache/ensemble_camembert-base/models/mlp/34f11206e998742daa38eab2cc516982d5615cb3084e509514f0b25124fe293a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5822 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6641,3.306855,1.818476,1.551823,3.306855
46,1.3686,0.963358,0.981508,0.803235,0.963358
69,0.8388,0.944226,0.971713,0.792489,0.944226


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 11.0352 - root_mean_squared_error: 3.3219
Epoch 1: val_root_mean_squared_error improved from inf to 0.71407, saving model to cache/ensemble_camembert-base/models/mlp/ad5842e273f10b3c02181f42dd7e8ff7553572de5fb264645c719022d51d24e6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1567 - root_mean_squared_error: 1.6818 - val_loss: 0.5099 - val_root_mean_squared_error: 0.7141
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 0.6914 - root_mean_squared_error: 0.8315
Epoch 2: val_root_mean_squared_error did not improve from 0.71407
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4903 - root_mean_squared_error: 0.6959 - val_loss: 1.1759 - val_root_mean_squared_error: 1.0844
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - los

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6134,3.066255,1.751073,1.50353,3.066255
46,1.1883,0.841602,0.917389,0.740819,0.841602
69,0.8919,0.839439,0.916209,0.744336,0.839439


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 8.1692 - root_mean_squared_error: 2.8582
Epoch 1: val_root_mean_squared_error improved from inf to 1.01986, saving model to cache/ensemble_camembert-base/models/mlp/df45abc26f9516b0985c41ef1fbd296225b6abce812212a05f7de3a2b5ec57a9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3094 - root_mean_squared_error: 1.7422 - val_loss: 1.0401 - val_root_mean_squared_error: 1.0199
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.6790 - root_mean_squared_error: 0.8240
Epoch 2: val_root_mean_squared_error improved from 1.01986 to 0.59553, saving model to cache/ensemble_camembert-base/models/mlp/df45abc26f9516b0985c41ef1fbd296225b6abce812212a05f7de3a2b5ec57a9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5073 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3626,1.92076,1.385915,1.182752,1.92076
46,1.4567,0.58205,0.762922,0.643826,0.58205
69,0.8871,0.616044,0.784885,0.664279,0.616044


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 7.6625 - root_mean_squared_error: 2.7681
Epoch 1: val_root_mean_squared_error improved from inf to 0.76112, saving model to cache/ensemble_camembert-base/models/mlp/37c75ab5a5d47915158d5fbcd19515f465ec693a96e42e62adb86d30a71e561d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6706 - root_mean_squared_error: 1.5662 - val_loss: 0.5793 - val_root_mean_squared_error: 0.7611
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.6794 - root_mean_squared_error: 0.8243
Epoch 2: val_root_mean_squared_error improved from 0.76112 to 0.72039, saving model to cache/ensemble_camembert-base/models/mlp/37c75ab5a5d47915158d5fbcd19515f465ec693a96e42e62adb86d30a71e561d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5123 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5314,3.618984,1.902363,1.584409,3.618984
46,1.5149,1.189482,1.090634,0.864326,1.189482
69,0.8803,1.132809,1.064335,0.868573,1.132809


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 5.3793 - root_mean_squared_error: 2.3193
Epoch 1: val_root_mean_squared_error improved from inf to 0.85332, saving model to cache/ensemble_camembert-base/models/mlp/39ec40fcea04f18d768c269aeac728a9c16749968ea4a27a43be972deda9c4b0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4991 - root_mean_squared_error: 1.5251 - val_loss: 0.7282 - val_root_mean_squared_error: 0.8533
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.3130 - root_mean_squared_error: 0.5595
Epoch 2: val_root_mean_squared_error did not improve from 0.85332
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5824 - root_mean_squared_error: 0.7596 - val_loss: 0.9823 - val_root_mean_squared_error: 0.9911
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8668,2.700583,1.643345,1.356656,2.700583
46,1.5034,0.861198,0.928008,0.763865,0.861198
69,0.7593,0.854334,0.924302,0.759837,0.854334


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.5531 - root_mean_squared_error: 2.7483
Epoch 1: val_root_mean_squared_error improved from inf to 0.59479, saving model to cache/ensemble_camembert-base/models/mlp/e626bb537cba3c7f4ad255c387a250ece611403476f9b91b77a8afcd3b3aad26_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1966 - root_mean_squared_error: 1.7119 - val_loss: 0.3538 - val_root_mean_squared_error: 0.5948
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2184 - root_mean_squared_error: 0.4674
Epoch 2: val_root_mean_squared_error improved from 0.59479 to 0.59330, saving model to cache/ensemble_camembert-base/models/mlp/e626bb537cba3c7f4ad255c387a250ece611403476f9b91b77a8afcd3b3aad26_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4073 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6875,2.152197,1.467037,1.226721,2.152197
46,1.2129,0.685178,0.827755,0.731798,0.685178
69,0.724,0.623141,0.789393,0.683636,0.623141


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 6.4312 - root_mean_squared_error: 2.5360
Epoch 1: val_root_mean_squared_error improved from inf to 0.72212, saving model to cache/ensemble_camembert-base/models/mlp/8f248700cab9594c85e8dd6fa07bd4718c8b835acdb6cb0fcfae3134c0b2ffa4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5432 - root_mean_squared_error: 1.5325 - val_loss: 0.5215 - val_root_mean_squared_error: 0.7221
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 0.5961 - root_mean_squared_error: 0.7721
Epoch 2: val_root_mean_squared_error improved from 0.72212 to 0.44975, saving model to cache/ensemble_camembert-base/models/mlp/8f248700cab9594c85e8dd6fa07bd4718c8b835acdb6cb0fcfae3134c0b2ffa4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4350 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5869,2.544528,1.595158,1.319891,2.544528
46,1.0441,0.888409,0.942555,0.698574,0.888409
69,0.8,0.830199,0.911153,0.664632,0.830199


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.1201 - root_mean_squared_error: 2.6684
Epoch 1: val_root_mean_squared_error improved from inf to 0.56329, saving model to cache/ensemble_camembert-base/models/mlp/d17df1898d7112ea92d531b496966fd4bfd36d8b85efc1d82405f5c7c13f3e8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2593 - root_mean_squared_error: 1.7293 - val_loss: 0.3173 - val_root_mean_squared_error: 0.5633
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3972 - root_mean_squared_error: 0.6302
Epoch 2: val_root_mean_squared_error improved from 0.56329 to 0.44312, saving model to cache/ensemble_camembert-base/models/mlp/d17df1898d7112ea92d531b496966fd4bfd36d8b85efc1d82405f5c7c13f3e8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4690 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7364,1.783336,1.335416,1.156418,1.783336
46,1.5514,0.515942,0.718291,0.538273,0.515942
69,0.8017,0.498211,0.705841,0.531877,0.498211


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 9.6677 - root_mean_squared_error: 3.1093
Epoch 1: val_root_mean_squared_error improved from inf to 0.67005, saving model to cache/ensemble_camembert-base/models/mlp/a4030d1aeb8a5ed6cc179bddc801168ef5f67e6e7785868e19462620ea9b102d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3294 - root_mean_squared_error: 1.7383 - val_loss: 0.4490 - val_root_mean_squared_error: 0.6700
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 0.2459 - root_mean_squared_error: 0.4959
Epoch 2: val_root_mean_squared_error improved from 0.67005 to 0.60917, saving model to cache/ensemble_camembert-base/models/mlp/a4030d1aeb8a5ed6cc179bddc801168ef5f67e6e7785868e19462620ea9b102d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3895 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.006,3.26398,1.806649,1.610537,3.26398
46,1.4775,0.735618,0.857682,0.701024,0.735618
69,0.8953,0.700674,0.837063,0.691465,0.700674


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 6.2549 - root_mean_squared_error: 2.5010
Epoch 1: val_root_mean_squared_error improved from inf to 0.48326, saving model to cache/ensemble_camembert-base/models/mlp/77f569c07dd1eac26d720462e7d0af96c3c7e731bee32bab8b67b5319fda50ba_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5604 - root_mean_squared_error: 1.5344 - val_loss: 0.2335 - val_root_mean_squared_error: 0.4833
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1563 - root_mean_squared_error: 0.3953
Epoch 2: val_root_mean_squared_error did not improve from 0.48326
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5130 - root_mean_squared_error: 0.7054 - val_loss: 0.7894 - val_root_mean_squared_error: 0.8885
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4457,2.784337,1.668633,1.43374,2.784337
46,1.282,0.746053,0.863744,0.704214,0.746053
69,0.7859,0.763313,0.873678,0.696835,0.763313


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 5.3282 - root_mean_squared_error: 2.3083
Epoch 1: val_root_mean_squared_error improved from inf to 0.56359, saving model to cache/ensemble_camembert-base/models/mlp/d26ae0a20ae743737145596f4d9c5ea917cc0e6b21952bd0cc08f0910e00ea63_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8098 - root_mean_squared_error: 1.6155 - val_loss: 0.3176 - val_root_mean_squared_error: 0.5636
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2648 - root_mean_squared_error: 0.5146
Epoch 2: val_root_mean_squared_error did not improve from 0.56359
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4428 - root_mean_squared_error: 0.6613 - val_loss: 0.4347 - val_root_mean_squared_error: 0.6593
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9078,3.207159,1.790854,1.507365,3.207159
46,1.2853,0.96772,0.983727,0.852783,0.96772
69,0.8047,0.952074,0.975743,0.845692,0.952074


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 8.1590 - root_mean_squared_error: 2.8564
Epoch 1: val_root_mean_squared_error improved from inf to 0.78180, saving model to cache/ensemble_camembert-base/models/mlp/4c56ac4a9ea9ed8677b4d1f4451e9e25aec3420fa32ee50aa6bbc7dad24a93b5_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9221 - root_mean_squared_error: 1.6360 - val_loss: 0.6112 - val_root_mean_squared_error: 0.7818
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2823 - root_mean_squared_error: 0.5313
Epoch 2: val_root_mean_squared_error improved from 0.78180 to 0.60154, saving model to cache/ensemble_camembert-base/models/mlp/4c56ac4a9ea9ed8677b4d1f4451e9e25aec3420fa32ee50aa6bbc7dad24a93b5_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4603 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6837,2.530278,1.590685,1.326975,2.530278
46,1.3038,0.783093,0.884925,0.718142,0.783093
69,0.8161,0.674655,0.821374,0.653573,0.674655


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 6.9861 - root_mean_squared_error: 2.6431
Epoch 1: val_root_mean_squared_error improved from inf to 0.62213, saving model to cache/ensemble_camembert-base/models/mlp/c0cbf6959310af5e3a02129fce938634c200197d3ba21af4fd824c63129c64dc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 2.8750 - root_mean_squared_error: 1.6336 - val_loss: 0.3870 - val_root_mean_squared_error: 0.6221
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5459 - root_mean_squared_error: 0.7388
Epoch 2: val_root_mean_squared_error improved from 0.62213 to 0.39436, saving model to cache/ensemble_camembert-base/models/mlp/c0cbf6959310af5e3a02129fce938634c200197d3ba21af4fd824c63129c64dc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5275 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5572,2.307299,1.518979,1.298566,2.307299
46,1.4513,0.620238,0.787552,0.595503,0.620238
69,0.7724,0.622051,0.788702,0.59467,0.622051


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 697ms/step - loss: 5.2361 - root_mean_squared_error: 2.2882
Epoch 1: val_root_mean_squared_error improved from inf to 0.73088, saving model to cache/ensemble_camembert-base/models/mlp/436e7d794d2ae85a585f44f4dbf0544bffce9e73d769a22e5512f743b81de5ff_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8039 - root_mean_squared_error: 1.6211 - val_loss: 0.5342 - val_root_mean_squared_error: 0.7309
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7806 - root_mean_squared_error: 0.8835
Epoch 2: val_root_mean_squared_error did not improve from 0.73088
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6051 - root_mean_squared_error: 0.7740 - val_loss: 0.8272 - val_root_mean_squared_error: 0.9095
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4887,3.18418,1.784427,1.442831,3.18418
46,1.2728,1.102379,1.049942,0.893307,1.102379
69,0.8465,1.098603,1.048142,0.898893,1.098603


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.4940 - root_mean_squared_error: 2.5483
Epoch 1: val_root_mean_squared_error improved from inf to 0.60128, saving model to cache/ensemble_camembert-base/models/mlp/b45044d996c11d2c79da9b6e32f056dbd8905c6dbd90f85b289eaa1883f4bdca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2590 - root_mean_squared_error: 1.7435 - val_loss: 0.3615 - val_root_mean_squared_error: 0.6013
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2989 - root_mean_squared_error: 0.5468
Epoch 2: val_root_mean_squared_error improved from 0.60128 to 0.44421, saving model to cache/ensemble_camembert-base/models/mlp/b45044d996c11d2c79da9b6e32f056dbd8905c6dbd90f85b289eaa1883f4bdca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4378 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9568,3.629205,1.905047,1.58405,3.629205
46,1.5313,1.182231,1.087305,0.926309,1.182231
69,0.9262,1.066219,1.032579,0.848156,1.066219


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 6.5325 - root_mean_squared_error: 2.5559
Epoch 1: val_root_mean_squared_error improved from inf to 1.22112, saving model to cache/ensemble_camembert-base/models/mlp/20e2dc881b62a2102e48574eaab177b1651038e73f0fecd63ccd589e6cc3036b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8607 - root_mean_squared_error: 1.6223 - val_loss: 1.4911 - val_root_mean_squared_error: 1.2211
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 1.1704 - root_mean_squared_error: 1.0819
Epoch 2: val_root_mean_squared_error improved from 1.22112 to 0.47450, saving model to cache/ensemble_camembert-base/models/mlp/20e2dc881b62a2102e48574eaab177b1651038e73f0fecd63ccd589e6cc3036b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4530 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0236,2.465984,1.570345,1.37864,2.465984
46,1.4037,0.586347,0.765733,0.592112,0.586347
69,1.0276,0.573216,0.75711,0.583631,0.573216


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 8.0331 - root_mean_squared_error: 2.8343
Epoch 1: val_root_mean_squared_error improved from inf to 1.03811, saving model to cache/ensemble_camembert-base/models/mlp/65affc2c1a6fb4882786c9f51e6acb1386639b53acf4aefdee8f10a536775692_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8278 - root_mean_squared_error: 1.6033 - val_loss: 1.0777 - val_root_mean_squared_error: 1.0381
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7688 - root_mean_squared_error: 0.8768
Epoch 2: val_root_mean_squared_error improved from 1.03811 to 0.88366, saving model to cache/ensemble_camembert-base/models/mlp/65affc2c1a6fb4882786c9f51e6acb1386639b53acf4aefdee8f10a536775692_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6051 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2371,1.950951,1.396765,1.109499,1.950951
46,1.3481,0.820982,0.90608,0.771918,0.820982
69,0.9818,0.917048,0.957626,0.837687,0.917048


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 674ms/step - loss: 9.8723 - root_mean_squared_error: 3.1420
Epoch 1: val_root_mean_squared_error improved from inf to 0.59223, saving model to cache/ensemble_camembert-base/models/mlp/7bac9d80c284abe30ebff2e0189d77fc113418a29546fd977745a7dffaf0a51b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.9682 - root_mean_squared_error: 1.8943 - val_loss: 0.3507 - val_root_mean_squared_error: 0.5922
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.3582 - root_mean_squared_error: 0.5985
Epoch 2: val_root_mean_squared_error did not improve from 0.59223
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5954 - root_mean_squared_error: 0.7662 - val_loss: 0.4344 - val_root_mean_squared_error: 0.6591
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.3623,2.167052,1.472091,1.233122,2.167052
46,1.3945,0.707426,0.841086,0.676161,0.707426
69,0.7485,0.644024,0.802511,0.645574,0.644024


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 7.0964 - root_mean_squared_error: 2.6639
Epoch 1: val_root_mean_squared_error improved from inf to 0.54072, saving model to cache/ensemble_camembert-base/models/mlp/055da26a423a24b552e3b5e3a2863d2acdedd9779ee728efaad7c6a29c6fa52c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6890 - root_mean_squared_error: 1.5743 - val_loss: 0.2924 - val_root_mean_squared_error: 0.5407
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2819 - root_mean_squared_error: 0.5309
Epoch 2: val_root_mean_squared_error did not improve from 0.54072
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5011 - root_mean_squared_error: 0.7053 - val_loss: 0.3434 - val_root_mean_squared_error: 0.5860
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3395,2.53863,1.593308,1.30612,2.53863
46,1.1105,0.837096,0.91493,0.772431,0.837096
69,0.8147,0.7223,0.849883,0.702377,0.7223


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 8.5535 - root_mean_squared_error: 2.9246
Epoch 1: val_root_mean_squared_error improved from inf to 0.51431, saving model to cache/ensemble_camembert-base/models/mlp/940eb71643aaa90ea012d54e90ae34c3afe639f666a99839e908828060faba72_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8732 - root_mean_squared_error: 1.6230 - val_loss: 0.2645 - val_root_mean_squared_error: 0.5143
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5460 - root_mean_squared_error: 0.7389
Epoch 2: val_root_mean_squared_error did not improve from 0.51431
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4009 - root_mean_squared_error: 0.6324 - val_loss: 0.5881 - val_root_mean_squared_error: 0.7669
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9107,2.320667,1.523374,1.323291,2.320667
46,1.4599,0.567057,0.753032,0.649593,0.567057
69,0.8267,0.517505,0.719378,0.632379,0.517505


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 645ms/step - loss: 5.9939 - root_mean_squared_error: 2.4482
Epoch 1: val_root_mean_squared_error improved from inf to 0.39571, saving model to cache/ensemble_camembert-base/models/mlp/903595088ddc9b7168b88e19e901856a69a389c7a6ca8beab1c52beef5d5e1b4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6807 - root_mean_squared_error: 1.5753 - val_loss: 0.1566 - val_root_mean_squared_error: 0.3957
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1581 - root_mean_squared_error: 0.3976
Epoch 2: val_root_mean_squared_error did not improve from 0.39571
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5663 - root_mean_squared_error: 0.7415 - val_loss: 0.3189 - val_root_mean_squared_error: 0.5647
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.2414,2.763819,1.662474,1.35838,2.763819
46,1.5346,0.940401,0.969743,0.811148,0.940401
69,0.9121,0.915246,0.956685,0.810942,0.915246


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.8146 - root_mean_squared_error: 2.7955
Epoch 1: val_root_mean_squared_error improved from inf to 0.76860, saving model to cache/ensemble_camembert-base/models/mlp/7ce27a89fa338a58b9a88b97e9249eb82a0e8bc0c08aa34722be95cd5f176917_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7160 - root_mean_squared_error: 1.5690 - val_loss: 0.5907 - val_root_mean_squared_error: 0.7686
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3470 - root_mean_squared_error: 0.5891
Epoch 2: val_root_mean_squared_error did not improve from 0.76860
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4656 - root_mean_squared_error: 0.6736 - val_loss: 1.2153 - val_root_mean_squared_error: 1.1024
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3341,1.470363,1.212585,1.029969,1.470363
46,1.2417,0.618459,0.786421,0.639517,0.618459
69,0.9204,0.515651,0.718088,0.603817,0.515651


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 8.5465 - root_mean_squared_error: 2.9234
Epoch 1: val_root_mean_squared_error improved from inf to 0.50602, saving model to cache/ensemble_camembert-base/models/mlp/cd7c3ec6fe6dd8f597f542b16e2ec55ac7e9e90ec6b74570d8abc777db536c46_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3867 - root_mean_squared_error: 1.7568 - val_loss: 0.2561 - val_root_mean_squared_error: 0.5060
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3661 - root_mean_squared_error: 0.6050
Epoch 2: val_root_mean_squared_error did not improve from 0.50602
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4259 - root_mean_squared_error: 0.6508 - val_loss: 0.6007 - val_root_mean_squared_error: 0.7750
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8932,2.702104,1.643808,1.351085,2.702105
46,1.3948,0.91974,0.959031,0.850238,0.91974
69,0.8038,0.9201,0.959218,0.827272,0.9201


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.8855 - root_mean_squared_error: 2.8081
Epoch 1: val_root_mean_squared_error improved from inf to 0.56181, saving model to cache/ensemble_camembert-base/models/mlp/e00f7e9149561ff36818c69a2190d6bff532f3faa48507b24f5becaafb5fa478_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6000 - root_mean_squared_error: 1.5481 - val_loss: 0.3156 - val_root_mean_squared_error: 0.5618
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2812 - root_mean_squared_error: 0.5303
Epoch 2: val_root_mean_squared_error improved from 0.56181 to 0.56092, saving model to cache/ensemble_camembert-base/models/mlp/e00f7e9149561ff36818c69a2190d6bff532f3faa48507b24f5becaafb5fa478_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3578 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9975,2.561141,1.600356,1.346529,2.561141
46,1.2902,0.784079,0.885482,0.714055,0.784079
69,0.7719,0.76334,0.873693,0.698806,0.76334


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 7.9718 - root_mean_squared_error: 2.8234
Epoch 1: val_root_mean_squared_error improved from inf to 0.74137, saving model to cache/ensemble_camembert-base/models/mlp/0ac0f9959f9717338c42b9eeaef5ef749af9097fd91f620a08e3ff6215c13fce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5592 - root_mean_squared_error: 1.5293 - val_loss: 0.5496 - val_root_mean_squared_error: 0.7414
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8961 - root_mean_squared_error: 0.9466
Epoch 2: val_root_mean_squared_error improved from 0.74137 to 0.65207, saving model to cache/ensemble_camembert-base/models/mlp/0ac0f9959f9717338c42b9eeaef5ef749af9097fd91f620a08e3ff6215c13fce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5333 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2181,1.623331,1.2741,1.074968,1.623331
46,1.5264,0.656353,0.810156,0.694735,0.656353
69,0.9364,0.78868,0.888077,0.760996,0.78868


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 7.8357 - root_mean_squared_error: 2.7992
Epoch 1: val_root_mean_squared_error improved from inf to 0.80107, saving model to cache/ensemble_camembert-base/models/mlp/a97cd5e40f170281e4695772b2120d88f1e673ef55f065c8a8b3b3bd68252610_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7972 - root_mean_squared_error: 1.6023 - val_loss: 0.6417 - val_root_mean_squared_error: 0.8011
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.6608 - root_mean_squared_error: 0.8129
Epoch 2: val_root_mean_squared_error improved from 0.80107 to 0.31184, saving model to cache/ensemble_camembert-base/models/mlp/a97cd5e40f170281e4695772b2120d88f1e673ef55f065c8a8b3b3bd68252610_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4294 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6193,3.950207,1.987513,1.7459,3.950207
46,1.0174,1.074743,1.036698,0.779091,1.074743
69,0.8064,1.146025,1.070526,0.800472,1.146025


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 4.8427 - root_mean_squared_error: 2.2006
Epoch 1: val_root_mean_squared_error improved from inf to 0.75553, saving model to cache/ensemble_camembert-base/models/mlp/fb5b7acbe1974d93963a1aeb2a128ca48bb4b4190f68d198daa1f6a9814ad171_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5337 - root_mean_squared_error: 1.5396 - val_loss: 0.5708 - val_root_mean_squared_error: 0.7555
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.8516 - root_mean_squared_error: 0.9228
Epoch 2: val_root_mean_squared_error improved from 0.75553 to 0.60029, saving model to cache/ensemble_camembert-base/models/mlp/fb5b7acbe1974d93963a1aeb2a128ca48bb4b4190f68d198daa1f6a9814ad171_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7180 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3302,2.533455,1.591683,1.375469,2.533455
46,1.1326,0.688441,0.829723,0.684092,0.688441
69,0.7344,0.608284,0.779925,0.645187,0.608284


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 6.8429 - root_mean_squared_error: 2.6159
Epoch 1: val_root_mean_squared_error improved from inf to 0.57012, saving model to cache/ensemble_camembert-base/models/mlp/15f0ffeb8c25cfde80b91ac5c39a4494809774a6bab61bb6a515d5e86bea0805_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0437 - root_mean_squared_error: 1.6755 - val_loss: 0.3250 - val_root_mean_squared_error: 0.5701
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2675 - root_mean_squared_error: 0.5172
Epoch 2: val_root_mean_squared_error improved from 0.57012 to 0.51869, saving model to cache/ensemble_camembert-base/models/mlp/15f0ffeb8c25cfde80b91ac5c39a4494809774a6bab61bb6a515d5e86bea0805_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5553 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7904,2.426676,1.557779,1.206338,2.426676
46,1.4967,0.999163,0.999581,0.868844,0.999163
69,0.6945,0.973124,0.986471,0.852889,0.973124


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 4.4688 - root_mean_squared_error: 2.1140
Epoch 1: val_root_mean_squared_error improved from inf to 1.14502, saving model to cache/ensemble_camembert-base/models/mlp/83d6322667f95012f4c708009b1320d0d49c40cf4549099583da01cd97291e70_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6681 - root_mean_squared_error: 1.5834 - val_loss: 1.3111 - val_root_mean_squared_error: 1.1450
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6164 - root_mean_squared_error: 0.7851
Epoch 2: val_root_mean_squared_error improved from 1.14502 to 0.88728, saving model to cache/ensemble_camembert-base/models/mlp/83d6322667f95012f4c708009b1320d0d49c40cf4549099583da01cd97291e70_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4845 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5528,1.751231,1.323341,1.081767,1.751231
46,1.0618,0.732302,0.855746,0.730585,0.732302
69,0.8319,0.805443,0.897465,0.768046,0.805443


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 674ms/step - loss: 7.6861 - root_mean_squared_error: 2.7724
Epoch 1: val_root_mean_squared_error improved from inf to 0.85895, saving model to cache/ensemble_camembert-base/models/mlp/212778823eaeec48c1f54881e07f3507899a63f0b641e875982a9284840d911b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3654 - root_mean_squared_error: 1.4692 - val_loss: 0.7378 - val_root_mean_squared_error: 0.8589
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.3503 - root_mean_squared_error: 1.1620
Epoch 2: val_root_mean_squared_error did not improve from 0.85895
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7743 - root_mean_squared_error: 0.8617 - val_loss: 1.0771 - val_root_mean_squared_error: 1.0379
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7356,2.71917,1.648991,1.347313,2.71917
46,1.2243,0.948465,0.973892,0.785966,0.948465
69,0.8465,0.970523,0.985151,0.809837,0.970523


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.3740 - root_mean_squared_error: 2.7155
Epoch 1: val_root_mean_squared_error improved from inf to 0.64177, saving model to cache/ensemble_camembert-base/models/mlp/0bf41f0d97b891d37681a4e21a455bf8f697b9b85217d4c61552c8456e2a34d9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4132 - root_mean_squared_error: 1.4826 - val_loss: 0.4119 - val_root_mean_squared_error: 0.6418
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3068 - root_mean_squared_error: 0.5539
Epoch 2: val_root_mean_squared_error did not improve from 0.64177
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4259 - root_mean_squared_error: 0.6481 - val_loss: 1.2973 - val_root_mean_squared_error: 1.1390
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5927,2.872159,1.694745,1.462582,2.872159
46,1.4,0.753083,0.867803,0.703922,0.753083
69,0.8505,0.746096,0.863768,0.702903,0.746096


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 6.8787 - root_mean_squared_error: 2.6227
Epoch 1: val_root_mean_squared_error improved from inf to 0.61857, saving model to cache/ensemble_camembert-base/models/mlp/e6b36d3d6b78e1b53c98e4cdb974f2ba318a14df8c4148be9704aed74f954cf6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2263 - root_mean_squared_error: 1.7247 - val_loss: 0.3826 - val_root_mean_squared_error: 0.6186
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5194 - root_mean_squared_error: 0.7207
Epoch 2: val_root_mean_squared_error improved from 0.61857 to 0.42527, saving model to cache/ensemble_camembert-base/models/mlp/e6b36d3d6b78e1b53c98e4cdb974f2ba318a14df8c4148be9704aed74f954cf6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4838 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3359,2.9113,1.706253,1.531057,2.9113
46,1.3268,0.583926,0.764151,0.623868,0.583926
69,0.8375,0.569769,0.75483,0.610764,0.569769


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 7.5035 - root_mean_squared_error: 2.7392
Epoch 1: val_root_mean_squared_error improved from inf to 0.35974, saving model to cache/ensemble_camembert-base/models/mlp/d22ea335e4934831e167bd1ef10707448c1473039f56f22e717ddcfd927c0b8e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9931 - root_mean_squared_error: 1.6569 - val_loss: 0.1294 - val_root_mean_squared_error: 0.3597
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2058 - root_mean_squared_error: 0.4536
Epoch 2: val_root_mean_squared_error did not improve from 0.35974
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4245 - root_mean_squared_error: 0.6468 - val_loss: 0.3017 - val_root_mean_squared_error: 0.5492
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2622,2.125537,1.457922,1.195487,2.125537
46,1.2549,0.798372,0.893517,0.735168,0.798372
69,0.8078,0.713595,0.844746,0.683833,0.713595


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 8.8390 - root_mean_squared_error: 2.9730
Epoch 1: val_root_mean_squared_error improved from inf to 0.65075, saving model to cache/ensemble_camembert-base/models/mlp/e9c2422714cfe0a6d39f46baed3692c9415db91a4dd36102761a41e0baf59773_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3527 - root_mean_squared_error: 1.7505 - val_loss: 0.4235 - val_root_mean_squared_error: 0.6508
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4066 - root_mean_squared_error: 0.6376
Epoch 2: val_root_mean_squared_error improved from 0.65075 to 0.55843, saving model to cache/ensemble_camembert-base/models/mlp/e9c2422714cfe0a6d39f46baed3692c9415db91a4dd36102761a41e0baf59773_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5757 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1423,2.749304,1.658103,1.393802,2.749304
46,1.3858,0.841363,0.917259,0.782375,0.841363
69,0.8914,0.790332,0.889006,0.75793,0.790332


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.8697 - root_mean_squared_error: 2.8053
Epoch 1: val_root_mean_squared_error improved from inf to 0.88971, saving model to cache/ensemble_camembert-base/models/mlp/058cb1a2e758203624fc67ba9ceea93eed789313292a126ee8cf4c0617055949_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0162 - root_mean_squared_error: 1.6618 - val_loss: 0.7916 - val_root_mean_squared_error: 0.8897
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8495 - root_mean_squared_error: 0.9217
Epoch 2: val_root_mean_squared_error improved from 0.88971 to 0.61810, saving model to cache/ensemble_camembert-base/models/mlp/058cb1a2e758203624fc67ba9ceea93eed789313292a126ee8cf4c0617055949_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6035 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3503,2.312668,1.520746,1.254863,2.312668
46,1.3932,0.815328,0.902955,0.734239,0.815328
69,0.7644,0.808358,0.899088,0.732729,0.808358


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 7.8408 - root_mean_squared_error: 2.8001
Epoch 1: val_root_mean_squared_error improved from inf to 0.62041, saving model to cache/ensemble_camembert-base/models/mlp/066a4f6b984a06f829388bd8df53caf4cb813c56fd9570026aef644b9818dd95_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3677 - root_mean_squared_error: 1.7606 - val_loss: 0.3849 - val_root_mean_squared_error: 0.6204
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 73ms/step - loss: 0.3890 - root_mean_squared_error: 0.6237
Epoch 2: val_root_mean_squared_error improved from 0.62041 to 0.46277, saving model to cache/ensemble_camembert-base/models/mlp/066a4f6b984a06f829388bd8df53caf4cb813c56fd9570026aef644b9818dd95_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4996 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7143,2.787953,1.669716,1.434161,2.787953
46,1.013,0.767303,0.875958,0.680882,0.767303
69,0.7287,0.700903,0.837199,0.680576,0.700903


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 645ms/step - loss: 5.4853 - root_mean_squared_error: 2.3421
Epoch 1: val_root_mean_squared_error improved from inf to 0.54947, saving model to cache/ensemble_camembert-base/models/mlp/e3b660e372e4d077143c9e676654c6672f576ae75ce1ab434158ac2ccee158c9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0580 - root_mean_squared_error: 1.6828 - val_loss: 0.3019 - val_root_mean_squared_error: 0.5495
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1184 - root_mean_squared_error: 0.3440
Epoch 2: val_root_mean_squared_error improved from 0.54947 to 0.50413, saving model to cache/ensemble_camembert-base/models/mlp/e3b660e372e4d077143c9e676654c6672f576ae75ce1ab434158ac2ccee158c9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3463 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5211,2.574396,1.604493,1.291874,2.574396
46,1.2554,0.946391,0.972826,0.756089,0.946391
69,0.8725,1.015993,1.007965,0.840862,1.015993


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 4.4567 - root_mean_squared_error: 2.1111
Epoch 1: val_root_mean_squared_error improved from inf to 0.74387, saving model to cache/ensemble_camembert-base/models/mlp/cbfba424f12833bfdba4732267b18c628a0f61a166093915d03a12039646bd9c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.8970 - root_mean_squared_error: 1.6413 - val_loss: 0.5533 - val_root_mean_squared_error: 0.7439
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3517 - root_mean_squared_error: 0.5930
Epoch 2: val_root_mean_squared_error improved from 0.74387 to 0.57473, saving model to cache/ensemble_camembert-base/models/mlp/cbfba424f12833bfdba4732267b18c628a0f61a166093915d03a12039646bd9c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4916 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4728,3.560113,1.886826,1.66008,3.560114
46,1.4662,0.874417,0.935103,0.749484,0.874417
69,0.885,0.819112,0.905048,0.739301,0.819112


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 8.9816 - root_mean_squared_error: 2.9969
Epoch 1: val_root_mean_squared_error improved from inf to 0.64729, saving model to cache/ensemble_camembert-base/models/mlp/d698182a979eeca2515e20dc777a4111637315aab4cb4ea6372e2e43bd6788cb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1487 - root_mean_squared_error: 1.6826 - val_loss: 0.4190 - val_root_mean_squared_error: 0.6473
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3764 - root_mean_squared_error: 0.6136
Epoch 2: val_root_mean_squared_error did not improve from 0.64729
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4925 - root_mean_squared_error: 0.6988 - val_loss: 0.4485 - val_root_mean_squared_error: 0.6697
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0788,3.42955,1.851904,1.556105,3.42955
46,1.4923,1.072906,1.035812,0.747648,1.072906
69,0.7917,1.197181,1.094158,0.817498,1.197181


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 9.9862 - root_mean_squared_error: 3.1601
Epoch 1: val_root_mean_squared_error improved from inf to 0.68040, saving model to cache/ensemble_camembert-base/models/mlp/c944a7f1a0f7fedf47606afbbe0577897311f40e490db1aac90bce3b2bb3d221_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4810 - root_mean_squared_error: 1.7712 - val_loss: 0.4629 - val_root_mean_squared_error: 0.6804
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3785 - root_mean_squared_error: 0.6152
Epoch 2: val_root_mean_squared_error improved from 0.68040 to 0.50096, saving model to cache/ensemble_camembert-base/models/mlp/c944a7f1a0f7fedf47606afbbe0577897311f40e490db1aac90bce3b2bb3d221_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5002 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7738,2.59349,1.610432,1.341841,2.59349
46,1.2655,0.800383,0.894641,0.715425,0.800383
69,0.848,0.707383,0.841061,0.653708,0.707383


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 5.8516 - root_mean_squared_error: 2.4190
Epoch 1: val_root_mean_squared_error improved from inf to 0.57270, saving model to cache/ensemble_camembert-base/models/mlp/b0fde418d37f22d0d5b5c110bf7c7c5fce90b5ca7dd94b521d5b08417a4cd725_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5275 - root_mean_squared_error: 1.5253 - val_loss: 0.3280 - val_root_mean_squared_error: 0.5727
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2617 - root_mean_squared_error: 0.5116
Epoch 2: val_root_mean_squared_error did not improve from 0.57270
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4198 - root_mean_squared_error: 0.6459 - val_loss: 0.4256 - val_root_mean_squared_error: 0.6524
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9285,2.885836,1.698775,1.47067,2.885836
46,1.6069,0.762561,0.873248,0.671734,0.762561
69,0.8484,0.636713,0.797943,0.618259,0.636713


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.9297 - root_mean_squared_error: 2.8160
Epoch 1: val_root_mean_squared_error improved from inf to 1.07442, saving model to cache/ensemble_camembert-base/models/mlp/06fa20ad0b456dfb06cfde9187868e2249f8f0912994bfbcbf1452469e725334_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8481 - root_mean_squared_error: 1.6128 - val_loss: 1.1544 - val_root_mean_squared_error: 1.0744
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 1.2222 - root_mean_squared_error: 1.1055
Epoch 2: val_root_mean_squared_error improved from 1.07442 to 0.89942, saving model to cache/ensemble_camembert-base/models/mlp/06fa20ad0b456dfb06cfde9187868e2249f8f0912994bfbcbf1452469e725334_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6740 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9215,2.169403,1.472889,1.226312,2.169403
46,1.6974,0.679673,0.824423,0.716143,0.679673
69,0.7291,0.520091,0.721173,0.634305,0.520091


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 647ms/step - loss: 7.2325 - root_mean_squared_error: 2.6893
Epoch 1: val_root_mean_squared_error improved from inf to 0.45237, saving model to cache/ensemble_camembert-base/models/mlp/3d3af924d189cfeb3f8d2ae7288a756ed8b116c4efc6a01f8122c8f3c9d40341_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3004 - root_mean_squared_error: 1.7391 - val_loss: 0.2046 - val_root_mean_squared_error: 0.4524
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1359 - root_mean_squared_error: 0.3687
Epoch 2: val_root_mean_squared_error did not improve from 0.45237
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6059 - root_mean_squared_error: 0.7615 - val_loss: 0.4004 - val_root_mean_squared_error: 0.6328
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2418,2.868127,1.693555,1.427656,2.868127
46,1.3248,0.844606,0.919024,0.724914,0.844606
69,0.8581,0.742912,0.861923,0.695052,0.742912


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 4.8494 - root_mean_squared_error: 2.2021
Epoch 1: val_root_mean_squared_error improved from inf to 0.74712, saving model to cache/ensemble_camembert-base/models/mlp/90d3c73e5c746e18a33d9d3dddcaf150f251aec4f94b7d7ac30e8364d4ec6116_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8576 - root_mean_squared_error: 1.6370 - val_loss: 0.5582 - val_root_mean_squared_error: 0.7471
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.1580 - root_mean_squared_error: 0.3975
Epoch 2: val_root_mean_squared_error did not improve from 0.74712
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5751 - root_mean_squared_error: 0.7480 - val_loss: 0.8258 - val_root_mean_squared_error: 0.9088
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7645,3.052167,1.747045,1.450793,3.052167
46,1.2923,0.940364,0.969724,0.813112,0.940364
69,0.7793,0.894009,0.945521,0.769088,0.894009


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 7.4957 - root_mean_squared_error: 2.7378
Epoch 1: val_root_mean_squared_error improved from inf to 1.04187, saving model to cache/ensemble_camembert-base/models/mlp/73acf978ad8f41e98dd0e9e08f275aac6cf8e1a50f0ff2c252f07b49f3dc0879_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1229 - root_mean_squared_error: 1.6961 - val_loss: 1.0855 - val_root_mean_squared_error: 1.0419
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7280 - root_mean_squared_error: 0.8532
Epoch 2: val_root_mean_squared_error improved from 1.04187 to 0.59865, saving model to cache/ensemble_camembert-base/models/mlp/73acf978ad8f41e98dd0e9e08f275aac6cf8e1a50f0ff2c252f07b49f3dc0879_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4952 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2079,3.681633,1.918758,1.633298,3.681633
46,1.5018,1.058296,1.028735,0.819153,1.058296
69,0.8307,0.989652,0.994813,0.806228,0.989652


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 6.9370 - root_mean_squared_error: 2.6338
Epoch 1: val_root_mean_squared_error improved from inf to 0.66210, saving model to cache/ensemble_camembert-base/models/mlp/614251bb412d8bb31c7b47a413d1df59922aad4b7c9a8e70a43f5d658c926367_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2134 - root_mean_squared_error: 1.7151 - val_loss: 0.4384 - val_root_mean_squared_error: 0.6621
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4100 - root_mean_squared_error: 0.6403
Epoch 2: val_root_mean_squared_error improved from 0.66210 to 0.58527, saving model to cache/ensemble_camembert-base/models/mlp/614251bb412d8bb31c7b47a413d1df59922aad4b7c9a8e70a43f5d658c926367_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5734 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1225,3.407422,1.84592,1.512519,3.407421
46,1.4365,1.144235,1.069689,0.875186,1.144235
69,0.6717,0.963655,0.981659,0.806101,0.963655


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 680ms/step - loss: 8.2289 - root_mean_squared_error: 2.8686
Epoch 1: val_root_mean_squared_error improved from inf to 0.54484, saving model to cache/ensemble_camembert-base/models/mlp/0ef4a28e534245723fd5781547e6471becf4f87dfb0793474aa234ec88389764_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 3.3514 - root_mean_squared_error: 1.7498 - val_loss: 0.2969 - val_root_mean_squared_error: 0.5448
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1329 - root_mean_squared_error: 0.3646
Epoch 2: val_root_mean_squared_error did not improve from 0.54484
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4190 - root_mean_squared_error: 0.6376 - val_loss: 0.7690 - val_root_mean_squared_error: 0.8769
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8708,3.400413,1.844021,1.469381,3.400413
46,1.2753,1.269003,1.126501,0.959453,1.269004
69,0.679,1.222467,1.105652,0.944146,1.222467


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 9.5915 - root_mean_squared_error: 3.0970
Epoch 1: val_root_mean_squared_error improved from inf to 0.64569, saving model to cache/ensemble_camembert-base/models/mlp/797ef13d47dcbe26d654acb36196e11c1fecfa1995dc0fe00e8b78e0e3ac2258_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9304 - root_mean_squared_error: 1.6300 - val_loss: 0.4169 - val_root_mean_squared_error: 0.6457
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2972 - root_mean_squared_error: 0.5452
Epoch 2: val_root_mean_squared_error improved from 0.64569 to 0.47381, saving model to cache/ensemble_camembert-base/models/mlp/797ef13d47dcbe26d654acb36196e11c1fecfa1995dc0fe00e8b78e0e3ac2258_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4565 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2873,2.653899,1.629079,1.344562,2.653899
46,1.2582,0.879003,0.937552,0.73832,0.879003
69,0.795,0.853866,0.924049,0.725324,0.853866


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 4.9015 - root_mean_squared_error: 2.2139
Epoch 1: val_root_mean_squared_error improved from inf to 0.62659, saving model to cache/ensemble_camembert-base/models/mlp/bb9e7b05d0b7844846c139c788f682a4587c64a7747caf3ae01f00ffaec74d0d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8582 - root_mean_squared_error: 1.6294 - val_loss: 0.3926 - val_root_mean_squared_error: 0.6266
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3492 - root_mean_squared_error: 0.5909
Epoch 2: val_root_mean_squared_error did not improve from 0.62659
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5285 - root_mean_squared_error: 0.7245 - val_loss: 1.4655 - val_root_mean_squared_error: 1.2106
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5341,2.198253,1.482651,1.25276,2.198253
46,1.2031,0.698722,0.835896,0.684681,0.698722
69,0.7936,0.652083,0.807516,0.663949,0.652083


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 5.4389 - root_mean_squared_error: 2.3321
Epoch 1: val_root_mean_squared_error improved from inf to 0.94164, saving model to cache/ensemble_camembert-base/models/mlp/3ea417c9c0d8a0370d94890257240c44e30048fb93b1e6eeb4f211a208c36dd0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5293 - root_mean_squared_error: 1.5327 - val_loss: 0.8867 - val_root_mean_squared_error: 0.9416
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.4966 - root_mean_squared_error: 1.2233
Epoch 2: val_root_mean_squared_error improved from 0.94164 to 0.70329, saving model to cache/ensemble_camembert-base/models/mlp/3ea417c9c0d8a0370d94890257240c44e30048fb93b1e6eeb4f211a208c36dd0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7684 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7361,2.293388,1.514394,1.282432,2.293388
46,1.1873,0.675234,0.821726,0.697785,0.675234
69,0.8688,0.642705,0.801689,0.68249,0.642705


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 4.9736 - root_mean_squared_error: 2.2302
Epoch 1: val_root_mean_squared_error improved from inf to 1.24533, saving model to cache/ensemble_camembert-base/models/mlp/378c10cb5ca9633e75c21c976a1735dc24958e2a55f8cca505c432d26cfe9f39_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6272 - root_mean_squared_error: 1.5630 - val_loss: 1.5508 - val_root_mean_squared_error: 1.2453
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7259 - root_mean_squared_error: 0.8520
Epoch 2: val_root_mean_squared_error improved from 1.24533 to 0.51645, saving model to cache/ensemble_camembert-base/models/mlp/378c10cb5ca9633e75c21c976a1735dc24958e2a55f8cca505c432d26cfe9f39_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4526 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9955,1.789841,1.337849,1.146156,1.78984
46,1.1774,0.62375,0.789778,0.636735,0.62375
69,0.9317,0.600804,0.775115,0.616615,0.600804


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 8.3758 - root_mean_squared_error: 2.8941
Epoch 1: val_root_mean_squared_error improved from inf to 0.61789, saving model to cache/ensemble_camembert-base/models/mlp/fae9f592ac99d8425d0d402253b25bc6dc26419b629a0fc7cbe7710414ae9a8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1085 - root_mean_squared_error: 1.6897 - val_loss: 0.3818 - val_root_mean_squared_error: 0.6179
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3684 - root_mean_squared_error: 0.6070
Epoch 2: val_root_mean_squared_error improved from 0.61789 to 0.45844, saving model to cache/ensemble_camembert-base/models/mlp/fae9f592ac99d8425d0d402253b25bc6dc26419b629a0fc7cbe7710414ae9a8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3918 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8807,2.493293,1.579016,1.357785,2.493293
46,1.3777,0.675157,0.82168,0.618186,0.675157
69,0.8218,0.67051,0.818847,0.622896,0.67051


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 5.0827 - root_mean_squared_error: 2.2545
Epoch 1: val_root_mean_squared_error improved from inf to 0.55926, saving model to cache/ensemble_camembert-base/models/mlp/cc948c3e4c896b432bd30fbc57bf2fa850b5cd00ae138e5a8d26f8f158344945_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4487 - root_mean_squared_error: 1.5162 - val_loss: 0.3128 - val_root_mean_squared_error: 0.5593
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2981 - root_mean_squared_error: 0.5460
Epoch 2: val_root_mean_squared_error improved from 0.55926 to 0.42124, saving model to cache/ensemble_camembert-base/models/mlp/cc948c3e4c896b432bd30fbc57bf2fa850b5cd00ae138e5a8d26f8f158344945_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7619 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5319,3.300085,1.816614,1.508812,3.300085
46,1.2005,1.037191,1.018426,0.789914,1.037191
69,0.6842,0.920088,0.959212,0.733788,0.920088


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 8.2392 - root_mean_squared_error: 2.8704
Epoch 1: val_root_mean_squared_error improved from inf to 1.01890, saving model to cache/ensemble_camembert-base/models/mlp/a8eaebffc0733744c24dc44c1d8a88151103d440835cf5494aa5786b69ef3e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0314 - root_mean_squared_error: 1.6629 - val_loss: 1.0382 - val_root_mean_squared_error: 1.0189
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.4955 - root_mean_squared_error: 1.2229
Epoch 2: val_root_mean_squared_error improved from 1.01890 to 0.69193, saving model to cache/ensemble_camembert-base/models/mlp/a8eaebffc0733744c24dc44c1d8a88151103d440835cf5494aa5786b69ef3e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5816 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7061,3.030437,1.740815,1.413456,3.030437
46,1.4281,1.074056,1.036367,0.869362,1.074056
69,0.9536,1.062195,1.030629,0.860453,1.062195


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 5.7776 - root_mean_squared_error: 2.4037
Epoch 1: val_root_mean_squared_error improved from inf to 0.54984, saving model to cache/ensemble_camembert-base/models/mlp/d03ef7687352d9539ad20a9489ef19acd31ec059a6862a9a512583b5ee3c9ffe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 2.8046 - root_mean_squared_error: 1.6150 - val_loss: 0.3023 - val_root_mean_squared_error: 0.5498
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2784 - root_mean_squared_error: 0.5277
Epoch 2: val_root_mean_squared_error did not improve from 0.54984
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6599 - root_mean_squared_error: 0.8033 - val_loss: 0.6676 - val_root_mean_squared_error: 0.8171
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9436,2.235935,1.495304,1.121411,2.235935
46,1.3798,1.134044,1.064915,0.875114,1.134044
69,0.877,1.243133,1.114959,0.934661,1.243133


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 7.4765 - root_mean_squared_error: 2.7343
Epoch 1: val_root_mean_squared_error improved from inf to 0.70715, saving model to cache/ensemble_camembert-base/models/mlp/6ee7a61e0c51edc963b5c4a3b46e67d88dfabcb9981508526cab3b47b7999ff3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9299 - root_mean_squared_error: 1.6388 - val_loss: 0.5001 - val_root_mean_squared_error: 0.7072
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.3360 - root_mean_squared_error: 0.5797
Epoch 2: val_root_mean_squared_error improved from 0.70715 to 0.67384, saving model to cache/ensemble_camembert-base/models/mlp/6ee7a61e0c51edc963b5c4a3b46e67d88dfabcb9981508526cab3b47b7999ff3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5721 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0864,2.477364,1.573964,1.31928,2.477364
46,1.3844,0.777422,0.881715,0.707578,0.777422
69,0.7701,0.733788,0.856614,0.691925,0.733788


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 698ms/step - loss: 6.8801 - root_mean_squared_error: 2.6230
Epoch 1: val_root_mean_squared_error improved from inf to 0.61763, saving model to cache/ensemble_camembert-base/models/mlp/75b1bdb998dffff9b90429284b355bbfb687cb2e4ecee2340e030322b57e925a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5272 - root_mean_squared_error: 1.5274 - val_loss: 0.3815 - val_root_mean_squared_error: 0.6176
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2996 - root_mean_squared_error: 0.5473
Epoch 2: val_root_mean_squared_error improved from 0.61763 to 0.53079, saving model to cache/ensemble_camembert-base/models/mlp/75b1bdb998dffff9b90429284b355bbfb687cb2e4ecee2340e030322b57e925a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4862 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4651,2.631625,1.622228,1.352767,2.631625
46,1.3895,0.815159,0.902861,0.76447,0.815159
69,0.8849,0.85682,0.925646,0.808422,0.85682


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 4.9381 - root_mean_squared_error: 2.2222
Epoch 1: val_root_mean_squared_error improved from inf to 0.66183, saving model to cache/ensemble_camembert-base/models/mlp/3fa44c0dd855d51bd227c7e0b227afbf92085597d6c3e2505bf6dc0731839066_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9413 - root_mean_squared_error: 1.6609 - val_loss: 0.4380 - val_root_mean_squared_error: 0.6618
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5182 - root_mean_squared_error: 0.7199
Epoch 2: val_root_mean_squared_error did not improve from 0.66183
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4256 - root_mean_squared_error: 0.6499 - val_loss: 0.5509 - val_root_mean_squared_error: 0.7422
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0486,2.570999,1.603434,1.375293,2.570999
46,1.3202,0.734112,0.856803,0.733276,0.734112
69,0.7691,0.704637,0.839427,0.71121,0.704637


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 9.8207 - root_mean_squared_error: 3.1338
Epoch 1: val_root_mean_squared_error improved from inf to 0.84210, saving model to cache/ensemble_camembert-base/models/mlp/07c721c138ec732f0fcf9a79f008d8a78607776f01781a6c00fcdd8f35514301_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4335 - root_mean_squared_error: 1.7668 - val_loss: 0.7091 - val_root_mean_squared_error: 0.8421
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.8112 - root_mean_squared_error: 0.9007
Epoch 2: val_root_mean_squared_error improved from 0.84210 to 0.52314, saving model to cache/ensemble_camembert-base/models/mlp/07c721c138ec732f0fcf9a79f008d8a78607776f01781a6c00fcdd8f35514301_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5118 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7689,1.98141,1.407626,1.134448,1.98141
46,1.0645,0.805517,0.897506,0.741212,0.805517
69,0.9479,0.761406,0.872586,0.713475,0.761406


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 5.0544 - root_mean_squared_error: 2.2482
Epoch 1: val_root_mean_squared_error improved from inf to 0.46263, saving model to cache/ensemble_camembert-base/models/mlp/fb49d75f95a3ac4425bb74a03058702b65ca3e49722e4cae29ce0591fc770417_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4069 - root_mean_squared_error: 1.5023 - val_loss: 0.2140 - val_root_mean_squared_error: 0.4626
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1813 - root_mean_squared_error: 0.4258
Epoch 2: val_root_mean_squared_error did not improve from 0.46263
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5594 - root_mean_squared_error: 0.7389 - val_loss: 0.6484 - val_root_mean_squared_error: 0.8052
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7512,1.790281,1.338014,1.148157,1.790282
46,1.3811,0.566613,0.752737,0.64141,0.566613
69,0.8242,0.456797,0.675867,0.584012,0.456797


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 7.5377 - root_mean_squared_error: 2.7455
Epoch 1: val_root_mean_squared_error improved from inf to 0.58380, saving model to cache/ensemble_camembert-base/models/mlp/88b26dbf5ebe0e276ce713f83c865118d1b202ee60e8cbc364ee5a252d0e8135_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0805 - root_mean_squared_error: 1.6794 - val_loss: 0.3408 - val_root_mean_squared_error: 0.5838
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.4267 - root_mean_squared_error: 0.6532
Epoch 2: val_root_mean_squared_error did not improve from 0.58380
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5942 - root_mean_squared_error: 0.7639 - val_loss: 0.6057 - val_root_mean_squared_error: 0.7782
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6607,3.011881,1.735477,1.409568,3.011881
46,1.3613,1.061576,1.030328,0.843722,1.061576
69,0.7616,1.056525,1.027874,0.841616,1.056525


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 9.9987 - root_mean_squared_error: 3.1621
Epoch 1: val_root_mean_squared_error improved from inf to 0.58344, saving model to cache/ensemble_camembert-base/models/mlp/4f63c5812d7c65a626aa73fcd436ef60b6d4715b95e45bf09bf0bcd9fdd30d12_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9241 - root_mean_squared_error: 1.6246 - val_loss: 0.3404 - val_root_mean_squared_error: 0.5834
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4327 - root_mean_squared_error: 0.6578
Epoch 2: val_root_mean_squared_error improved from 0.58344 to 0.58034, saving model to cache/ensemble_camembert-base/models/mlp/4f63c5812d7c65a626aa73fcd436ef60b6d4715b95e45bf09bf0bcd9fdd30d12_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6816 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5757,2.962996,1.721336,1.456746,2.962996
46,1.1316,0.87469,0.935249,0.737164,0.87469
69,0.8744,0.865854,0.930513,0.736511,0.865854


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 8.3292 - root_mean_squared_error: 2.8860
Epoch 1: val_root_mean_squared_error improved from inf to 1.20137, saving model to cache/ensemble_camembert-base/models/mlp/8bac784b71a0567891320418c97082ddd75ab91a9461d3d3ba89935fb079b93b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4501 - root_mean_squared_error: 1.7789 - val_loss: 1.4433 - val_root_mean_squared_error: 1.2014
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.2415 - root_mean_squared_error: 1.1142
Epoch 2: val_root_mean_squared_error improved from 1.20137 to 0.59506, saving model to cache/ensemble_camembert-base/models/mlp/8bac784b71a0567891320418c97082ddd75ab91a9461d3d3ba89935fb079b93b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5644 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3022,2.030561,1.424978,1.17478,2.030561
46,1.3993,0.719555,0.848266,0.713157,0.719556
69,0.8672,0.737929,0.859028,0.733462,0.737929


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 7.4086 - root_mean_squared_error: 2.7219
Epoch 1: val_root_mean_squared_error improved from inf to 0.45675, saving model to cache/ensemble_camembert-base/models/mlp/5170136033579d50495d65ecbad40ce8b7f100136bc373ab352211343c2b1a12_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4943 - root_mean_squared_error: 1.5128 - val_loss: 0.2086 - val_root_mean_squared_error: 0.4568
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3475 - root_mean_squared_error: 0.5895
Epoch 2: val_root_mean_squared_error did not improve from 0.45675
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4900 - root_mean_squared_error: 0.6983 - val_loss: 0.7399 - val_root_mean_squared_error: 0.8602
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5944,3.493049,1.86897,1.528318,3.493049
46,1.3612,1.186138,1.0891,0.844091,1.186138
69,0.8703,1.092567,1.045259,0.834503,1.092567


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 7.7638 - root_mean_squared_error: 2.7864
Epoch 1: val_root_mean_squared_error improved from inf to 0.56704, saving model to cache/ensemble_camembert-base/models/mlp/84acc7d4a7a28c8894cc67d8b6b14b7230685d24924d875239b8a2aee6de0aae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8496 - root_mean_squared_error: 1.6116 - val_loss: 0.3215 - val_root_mean_squared_error: 0.5670
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4847 - root_mean_squared_error: 0.6962
Epoch 2: val_root_mean_squared_error did not improve from 0.56704
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6533 - root_mean_squared_error: 0.8032 - val_loss: 0.5688 - val_root_mean_squared_error: 0.7542
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0218,2.489822,1.577917,1.324282,2.489822
46,1.4584,0.73906,0.859686,0.71363,0.73906
69,0.7083,0.725306,0.851649,0.705751,0.725306


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.4464 - root_mean_squared_error: 2.7288
Epoch 1: val_root_mean_squared_error improved from inf to 0.57109, saving model to cache/ensemble_camembert-base/models/mlp/07cf89bb1a4d57367addb8b2148820a0161dd4ddc5f99dc33bc0fa8a0f616415_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1486 - root_mean_squared_error: 1.7025 - val_loss: 0.3261 - val_root_mean_squared_error: 0.5711
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 0.2075 - root_mean_squared_error: 0.4555
Epoch 2: val_root_mean_squared_error did not improve from 0.57109
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4632 - root_mean_squared_error: 0.6739 - val_loss: 0.5697 - val_root_mean_squared_error: 0.7548
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6313,2.548288,1.596336,1.367135,2.548289
46,1.2006,0.682861,0.826354,0.728641,0.682861
69,0.7727,0.651006,0.806849,0.705587,0.651006


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 5.6255 - root_mean_squared_error: 2.3718
Epoch 1: val_root_mean_squared_error improved from inf to 0.82005, saving model to cache/ensemble_camembert-base/models/mlp/83ec554b6a76100907f47604b907631691cd32ef6b91d85d530a57562a9b76b4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4633 - root_mean_squared_error: 1.5141 - val_loss: 0.6725 - val_root_mean_squared_error: 0.8200
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5096 - root_mean_squared_error: 0.7139
Epoch 2: val_root_mean_squared_error improved from 0.82005 to 0.53018, saving model to cache/ensemble_camembert-base/models/mlp/83ec554b6a76100907f47604b907631691cd32ef6b91d85d530a57562a9b76b4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5598 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.406,3.093158,1.758738,1.515654,3.093158
46,1.0477,0.928765,0.963725,0.74646,0.928765
69,0.8005,0.836707,0.914717,0.671878,0.836707


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 647ms/step - loss: 6.5654 - root_mean_squared_error: 2.5623
Epoch 1: val_root_mean_squared_error improved from inf to 0.50266, saving model to cache/ensemble_camembert-base/models/mlp/d5d3fc6bed7ec653c29f8e2742f4a8da6e184f6e0227a1c0333c7a7fdbdc1b6a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8999 - root_mean_squared_error: 1.6389 - val_loss: 0.2527 - val_root_mean_squared_error: 0.5027
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3250 - root_mean_squared_error: 0.5701
Epoch 2: val_root_mean_squared_error improved from 0.50266 to 0.37677, saving model to cache/ensemble_camembert-base/models/mlp/d5d3fc6bed7ec653c29f8e2742f4a8da6e184f6e0227a1c0333c7a7fdbdc1b6a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3636 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5401,1.95623,1.398653,1.199756,1.95623
46,1.5908,0.594019,0.770726,0.61347,0.594019
69,0.8185,0.529103,0.727395,0.609298,0.529103


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 9.7702 - root_mean_squared_error: 3.1257
Epoch 1: val_root_mean_squared_error improved from inf to 0.59555, saving model to cache/ensemble_camembert-base/models/mlp/4dfb0c8b4dff33447592e9090269c6f47768fdef852b998f4bd2c0bf9774a47c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.5152 - root_mean_squared_error: 1.7896 - val_loss: 0.3547 - val_root_mean_squared_error: 0.5956
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2233 - root_mean_squared_error: 0.4725
Epoch 2: val_root_mean_squared_error improved from 0.59555 to 0.47827, saving model to cache/ensemble_camembert-base/models/mlp/4dfb0c8b4dff33447592e9090269c6f47768fdef852b998f4bd2c0bf9774a47c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3912 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2671,2.539171,1.593478,1.361239,2.539171
46,1.4211,0.683761,0.826898,0.69326,0.683761
69,1.0038,0.69213,0.831944,0.711938,0.69213


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 5.4088 - root_mean_squared_error: 2.3257
Epoch 1: val_root_mean_squared_error improved from inf to 0.50939, saving model to cache/ensemble_camembert-base/models/mlp/90627c4cf927a73c683716766299bb210411545779725ff855a9709ed3e568aa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5128 - root_mean_squared_error: 1.5281 - val_loss: 0.2595 - val_root_mean_squared_error: 0.5094
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3926 - root_mean_squared_error: 0.6266
Epoch 2: val_root_mean_squared_error did not improve from 0.50939
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6543 - root_mean_squared_error: 0.8034 - val_loss: 0.5157 - val_root_mean_squared_error: 0.7182
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1465,2.962824,1.721286,1.467274,2.962824
46,1.2364,0.833349,0.912879,0.738381,0.833349
69,0.7488,0.789466,0.888519,0.721638,0.789466


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 738ms/step - loss: 4.9603 - root_mean_squared_error: 2.2272
Epoch 1: val_root_mean_squared_error improved from inf to 0.74629, saving model to cache/ensemble_camembert-base/models/mlp/484585ffd706500a7f7ace1488b27c7e2241b40b06102f345843c09c6fad1282_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4866 - root_mean_squared_error: 1.5229 - val_loss: 0.5569 - val_root_mean_squared_error: 0.7463
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1765 - root_mean_squared_error: 0.4201
Epoch 2: val_root_mean_squared_error improved from 0.74629 to 0.45999, saving model to cache/ensemble_camembert-base/models/mlp/484585ffd706500a7f7ace1488b27c7e2241b40b06102f345843c09c6fad1282_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4069 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8686,2.631603,1.622221,1.347485,2.631603
46,1.3324,0.843009,0.918155,0.795486,0.843009
69,0.8556,0.806072,0.897815,0.775277,0.806072


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 680ms/step - loss: 6.6234 - root_mean_squared_error: 2.5736
Epoch 1: val_root_mean_squared_error improved from inf to 0.81911, saving model to cache/ensemble_camembert-base/models/mlp/9c9b88e3d727ee7efcc7e46815c5a0506914622fdc2c6cace5dad8495e0d2070_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7393 - root_mean_squared_error: 1.5926 - val_loss: 0.6709 - val_root_mean_squared_error: 0.8191
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6973 - root_mean_squared_error: 0.8350
Epoch 2: val_root_mean_squared_error did not improve from 0.81911
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6439 - root_mean_squared_error: 0.7988 - val_loss: 0.9246 - val_root_mean_squared_error: 0.9616
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8008,2.67035,1.634121,1.390518,2.67035
46,1.3084,0.74141,0.861052,0.673982,0.74141
69,0.7683,0.707672,0.841233,0.663722,0.707672


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 5.7640 - root_mean_squared_error: 2.4008
Epoch 1: val_root_mean_squared_error improved from inf to 0.51734, saving model to cache/ensemble_camembert-base/models/mlp/c92f44848a2384af63d3cfbddba60d63334221abe02da31e9e1c586b1e2533ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7661 - root_mean_squared_error: 1.6066 - val_loss: 0.2676 - val_root_mean_squared_error: 0.5173
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2247 - root_mean_squared_error: 0.4740
Epoch 2: val_root_mean_squared_error did not improve from 0.51734
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5120 - root_mean_squared_error: 0.7112 - val_loss: 1.1018 - val_root_mean_squared_error: 1.0497
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3241,2.286326,1.51206,1.260699,2.286326
46,1.5182,0.693394,0.832703,0.654669,0.693394
69,0.7482,0.670735,0.818984,0.652473,0.670735


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 5.7611 - root_mean_squared_error: 2.4002
Epoch 1: val_root_mean_squared_error improved from inf to 0.63298, saving model to cache/ensemble_camembert-base/models/mlp/40166beeeec832cc93000864fa474708241e832c774bfc63085e6a64b143b22d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7623 - root_mean_squared_error: 1.6067 - val_loss: 0.4007 - val_root_mean_squared_error: 0.6330
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4323 - root_mean_squared_error: 0.6575
Epoch 2: val_root_mean_squared_error did not improve from 0.63298
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4667 - root_mean_squared_error: 0.6824 - val_loss: 0.4512 - val_root_mean_squared_error: 0.6717
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0168,3.193438,1.787019,1.473602,3.193438
46,1.1899,1.021343,1.010615,0.851568,1.021343
69,0.8147,0.979337,0.989614,0.833521,0.979337


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 6.0799 - root_mean_squared_error: 2.4657
Epoch 1: val_root_mean_squared_error improved from inf to 0.54659, saving model to cache/ensemble_camembert-base/models/mlp/98bfc68988dcd09c544d6055459ffd19f971da505be62b5b0071443f1b3ff25e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9833 - root_mean_squared_error: 1.6711 - val_loss: 0.2988 - val_root_mean_squared_error: 0.5466
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3119 - root_mean_squared_error: 0.5585
Epoch 2: val_root_mean_squared_error improved from 0.54659 to 0.38401, saving model to cache/ensemble_camembert-base/models/mlp/98bfc68988dcd09c544d6055459ffd19f971da505be62b5b0071443f1b3ff25e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4695 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0658,3.56035,1.886889,1.597737,3.560351
46,1.3769,1.075113,1.036877,0.883286,1.075113
69,1.0003,0.939451,0.969253,0.768394,0.939451


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 6.7428 - root_mean_squared_error: 2.5967
Epoch 1: val_root_mean_squared_error improved from inf to 1.16986, saving model to cache/ensemble_camembert-base/models/mlp/c01eb476db289b13611dfb0f2d9326966a6579d18e466723c8a66d4b94c9cd51_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9133 - root_mean_squared_error: 1.6406 - val_loss: 1.3686 - val_root_mean_squared_error: 1.1699
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.9911 - root_mean_squared_error: 0.9955
Epoch 2: val_root_mean_squared_error improved from 1.16986 to 0.73061, saving model to cache/ensemble_camembert-base/models/mlp/c01eb476db289b13611dfb0f2d9326966a6579d18e466723c8a66d4b94c9cd51_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4288 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0264,2.780087,1.667359,1.448773,2.780087
46,1.3495,0.702546,0.83818,0.649811,0.702546
69,1.0253,0.687084,0.828905,0.638991,0.687084


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 8.6722 - root_mean_squared_error: 2.9449
Epoch 1: val_root_mean_squared_error improved from inf to 1.10414, saving model to cache/ensemble_camembert-base/models/mlp/6bd5c4937040097e4da331b1bc970d6c456731b6eea09fbc46c64193b4cddbc1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0089 - root_mean_squared_error: 1.6495 - val_loss: 1.2191 - val_root_mean_squared_error: 1.1041
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9910 - root_mean_squared_error: 0.9955
Epoch 2: val_root_mean_squared_error improved from 1.10414 to 0.47561, saving model to cache/ensemble_camembert-base/models/mlp/6bd5c4937040097e4da331b1bc970d6c456731b6eea09fbc46c64193b4cddbc1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4841 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.918,2.045332,1.430151,1.166915,2.045332
46,1.2693,0.764755,0.874503,0.754074,0.764755
69,0.9589,0.757413,0.870295,0.743945,0.757413


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m13s[0m 765ms/step - loss: 9.1635 - root_mean_squared_error: 3.0271
Epoch 1: val_root_mean_squared_error improved from inf to 0.64250, saving model to cache/ensemble_camembert-base/models/mlp/b0610079c7dba3b8ee1400012003fc1bfb340015fdc7a5731526be9ee8f80bb7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.6773 - root_mean_squared_error: 1.8264 - val_loss: 0.4128 - val_root_mean_squared_error: 0.6425
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3889 - root_mean_squared_error: 0.6237
Epoch 2: val_root_mean_squared_error improved from 0.64250 to 0.59808, saving model to cache/ensemble_camembert-base/models/mlp/b0610079c7dba3b8ee1400012003fc1bfb340015fdc7a5731526be9ee8f80bb7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5406 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4033,1.85012,1.360191,1.148038,1.85012
46,1.4711,0.609591,0.780763,0.6188,0.609591
69,0.7814,0.52532,0.72479,0.572264,0.52532


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 647ms/step - loss: 6.9237 - root_mean_squared_error: 2.6313
Epoch 1: val_root_mean_squared_error improved from inf to 0.48211, saving model to cache/ensemble_camembert-base/models/mlp/1f19d39383ed69b8645ac611b534cdb8caa64f00d334c182edaa4880027af372_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9476 - root_mean_squared_error: 1.6501 - val_loss: 0.2324 - val_root_mean_squared_error: 0.4821
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2077 - root_mean_squared_error: 0.4558
Epoch 2: val_root_mean_squared_error improved from 0.48211 to 0.37332, saving model to cache/ensemble_camembert-base/models/mlp/1f19d39383ed69b8645ac611b534cdb8caa64f00d334c182edaa4880027af372_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4833 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3084,2.273573,1.507837,1.244495,2.273573
46,1.1501,0.751212,0.866725,0.709605,0.751212
69,0.8664,0.605423,0.778089,0.634582,0.605423


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 8.1633 - root_mean_squared_error: 2.8571
Epoch 1: val_root_mean_squared_error improved from inf to 0.43441, saving model to cache/ensemble_camembert-base/models/mlp/edf1d81ff65cd9505644899b7ca0ef35b4fd55e7e5af21c3be7c193deed234aa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8396 - root_mean_squared_error: 1.6127 - val_loss: 0.1887 - val_root_mean_squared_error: 0.4344
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2127 - root_mean_squared_error: 0.4612
Epoch 2: val_root_mean_squared_error improved from 0.43441 to 0.33570, saving model to cache/ensemble_camembert-base/models/mlp/edf1d81ff65cd9505644899b7ca0ef35b4fd55e7e5af21c3be7c193deed234aa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4924 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7521,2.199714,1.483143,1.314903,2.199714
46,1.4134,0.47225,0.687205,0.575924,0.47225
69,0.8701,0.463269,0.680639,0.575962,0.463269


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 690ms/step - loss: 5.9186 - root_mean_squared_error: 2.4328
Epoch 1: val_root_mean_squared_error improved from inf to 0.49083, saving model to cache/ensemble_camembert-base/models/mlp/9c22f300f9ac6d53ef5d8efec5b741b689fe7911c45cf702ff11b61406cb03b8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8383 - root_mean_squared_error: 1.6211 - val_loss: 0.2409 - val_root_mean_squared_error: 0.4908
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3132 - root_mean_squared_error: 0.5597
Epoch 2: val_root_mean_squared_error improved from 0.49083 to 0.29882, saving model to cache/ensemble_camembert-base/models/mlp/9c22f300f9ac6d53ef5d8efec5b741b689fe7911c45cf702ff11b61406cb03b8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5078 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5474,2.480582,1.574986,1.256168,2.480582
46,1.4327,0.924159,0.961332,0.791911,0.924159
69,0.8881,0.91845,0.958358,0.803557,0.91845


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 7.2194 - root_mean_squared_error: 2.6869
Epoch 1: val_root_mean_squared_error improved from inf to 0.88933, saving model to cache/ensemble_camembert-base/models/mlp/a3db82ad6662ce8b1ee6649e882df797a916e2c17db0410ee3b7b435ddcdcaa7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6045 - root_mean_squared_error: 1.5407 - val_loss: 0.7909 - val_root_mean_squared_error: 0.8893
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.4320 - root_mean_squared_error: 0.6573
Epoch 2: val_root_mean_squared_error did not improve from 0.88933
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4669 - root_mean_squared_error: 0.6823 - val_loss: 0.9070 - val_root_mean_squared_error: 0.9524
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.085,1.7157,1.309847,1.081791,1.7157
46,1.1943,0.754701,0.868735,0.697696,0.754701
69,0.949,0.672182,0.819867,0.650083,0.672182


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 7.9452 - root_mean_squared_error: 2.8187
Epoch 1: val_root_mean_squared_error improved from inf to 0.95063, saving model to cache/ensemble_camembert-base/models/mlp/edbf7ca8d0c25e0d62d7004349518e060f766b309fe0ea842f392bd0e6ab577e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0659 - root_mean_squared_error: 1.6721 - val_loss: 0.9037 - val_root_mean_squared_error: 0.9506
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6624 - root_mean_squared_error: 0.8139
Epoch 2: val_root_mean_squared_error improved from 0.95063 to 0.62464, saving model to cache/ensemble_camembert-base/models/mlp/edbf7ca8d0c25e0d62d7004349518e060f766b309fe0ea842f392bd0e6ab577e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5141 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.874,2.683516,1.638144,1.366732,2.683516
46,1.3997,0.851896,0.922982,0.808257,0.851896
69,0.7869,0.821662,0.906456,0.78087,0.821662


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 8.7193 - root_mean_squared_error: 2.9528
Epoch 1: val_root_mean_squared_error improved from inf to 0.59669, saving model to cache/ensemble_camembert-base/models/mlp/7f824c1bb617613e10b2fd49d55f9af7652784a002a3a20d756383dcdbdfb901_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7920 - root_mean_squared_error: 1.5980 - val_loss: 0.3560 - val_root_mean_squared_error: 0.5967
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3438 - root_mean_squared_error: 0.5863
Epoch 2: val_root_mean_squared_error did not improve from 0.59669
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3338 - root_mean_squared_error: 0.5774 - val_loss: 0.7604 - val_root_mean_squared_error: 0.8720
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6571,2.550068,1.596893,1.341741,2.550068
46,1.4107,0.786954,0.887104,0.714244,0.786954
69,0.7806,0.767562,0.876106,0.702618,0.767562


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 7.2125 - root_mean_squared_error: 2.6856
Epoch 1: val_root_mean_squared_error improved from inf to 0.82601, saving model to cache/ensemble_camembert-base/models/mlp/4e0064a8e78e8cdbf9ea0375b98dc5d9b967ed98ad5be5fd5343ba91613f3a74_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5223 - root_mean_squared_error: 1.5224 - val_loss: 0.6823 - val_root_mean_squared_error: 0.8260
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.1427 - root_mean_squared_error: 1.0690
Epoch 2: val_root_mean_squared_error improved from 0.82601 to 0.67276, saving model to cache/ensemble_camembert-base/models/mlp/4e0064a8e78e8cdbf9ea0375b98dc5d9b967ed98ad5be5fd5343ba91613f3a74_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7487 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2175,1.894771,1.376507,1.147522,1.894771
46,1.4691,0.690076,0.830708,0.691805,0.690076
69,0.923,0.75115,0.866689,0.732581,0.75115


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.7908 - root_mean_squared_error: 2.6059
Epoch 1: val_root_mean_squared_error improved from inf to 0.81432, saving model to cache/ensemble_camembert-base/models/mlp/59182cd624876942ae124f2e6db16718b98fa8b0ddb41f79528c5e52d7ffd45b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8156 - root_mean_squared_error: 1.6126 - val_loss: 0.6631 - val_root_mean_squared_error: 0.8143
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4746 - root_mean_squared_error: 0.6889
Epoch 2: val_root_mean_squared_error improved from 0.81432 to 0.75152, saving model to cache/ensemble_camembert-base/models/mlp/59182cd624876942ae124f2e6db16718b98fa8b0ddb41f79528c5e52d7ffd45b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4162 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4558,3.933743,1.983367,1.715056,3.933744
46,1.0189,1.127415,1.061798,0.839581,1.127415
69,0.8315,1.330118,1.153307,0.885741,1.330118


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 5.6477 - root_mean_squared_error: 2.3765
Epoch 1: val_root_mean_squared_error improved from inf to 0.79484, saving model to cache/ensemble_camembert-base/models/mlp/ad90d34fe93338466f2f9bd3afe3a5cfe6e47861a420709211b8f1cc9ce9cf29_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9200 - root_mean_squared_error: 1.6449 - val_loss: 0.6318 - val_root_mean_squared_error: 0.7948
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8487 - root_mean_squared_error: 0.9212
Epoch 2: val_root_mean_squared_error improved from 0.79484 to 0.43242, saving model to cache/ensemble_camembert-base/models/mlp/ad90d34fe93338466f2f9bd3afe3a5cfe6e47861a420709211b8f1cc9ce9cf29_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.5834 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0566,2.161484,1.470199,1.279288,2.161484
46,1.1277,0.544642,0.737999,0.614453,0.544642
69,0.8237,0.554488,0.744639,0.620212,0.554488


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 6.3972 - root_mean_squared_error: 2.5293
Epoch 1: val_root_mean_squared_error improved from inf to 0.65262, saving model to cache/ensemble_camembert-base/models/mlp/7d3c0c6328afdbc748e177a561d1f7d819f226c29f0f2f9558c66b94238f315d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0171 - root_mean_squared_error: 1.6725 - val_loss: 0.4259 - val_root_mean_squared_error: 0.6526
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3952 - root_mean_squared_error: 0.6286
Epoch 2: val_root_mean_squared_error did not improve from 0.65262
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5785 - root_mean_squared_error: 0.7578 - val_loss: 0.6344 - val_root_mean_squared_error: 0.7965
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.696,2.713398,1.64724,1.287346,2.713398
46,1.4192,1.058067,1.028624,0.893471,1.058067
69,0.6723,0.868258,0.931803,0.798654,0.868258


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 647ms/step - loss: 5.0561 - root_mean_squared_error: 2.2486
Epoch 1: val_root_mean_squared_error improved from inf to 0.58699, saving model to cache/ensemble_camembert-base/models/mlp/5f92f14e53dfa2673f9ec0b50420c519858e2aa4d8d9e494c3e81c2c426a4fe9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5772 - root_mean_squared_error: 1.5526 - val_loss: 0.3446 - val_root_mean_squared_error: 0.5870
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2075 - root_mean_squared_error: 0.4556
Epoch 2: val_root_mean_squared_error did not improve from 0.58699
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5924 - root_mean_squared_error: 0.7608 - val_loss: 0.7075 - val_root_mean_squared_error: 0.8411
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9504,2.071867,1.439398,1.259076,2.071867
46,1.0171,0.514036,0.716963,0.594741,0.514036
69,0.8575,0.478179,0.691505,0.582197,0.478179


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 9.4610 - root_mean_squared_error: 3.0759
Epoch 1: val_root_mean_squared_error improved from inf to 0.52614, saving model to cache/ensemble_camembert-base/models/mlp/6862601ef66b511658c1c0ce354b99e4bb4211d061d2a32cb8c2cc333c7d57cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7286 - root_mean_squared_error: 1.5672 - val_loss: 0.2768 - val_root_mean_squared_error: 0.5261
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.6810 - root_mean_squared_error: 0.8252
Epoch 2: val_root_mean_squared_error did not improve from 0.52614
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7178 - root_mean_squared_error: 0.8370 - val_loss: 0.7658 - val_root_mean_squared_error: 0.8751
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5182,2.795032,1.671835,1.387496,2.795032
46,1.2194,0.893264,0.945127,0.709972,0.893264
69,0.7766,0.886625,0.941607,0.722879,0.886625


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 6.5406 - root_mean_squared_error: 2.5575
Epoch 1: val_root_mean_squared_error improved from inf to 0.89935, saving model to cache/ensemble_camembert-base/models/mlp/968d1459737c3c8c7e762028e541387395a8c5bf3762f130a20f765abbe81865_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3579 - root_mean_squared_error: 1.4718 - val_loss: 0.8088 - val_root_mean_squared_error: 0.8994
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 1.0712 - root_mean_squared_error: 1.0350
Epoch 2: val_root_mean_squared_error improved from 0.89935 to 0.84688, saving model to cache/ensemble_camembert-base/models/mlp/968d1459737c3c8c7e762028e541387395a8c5bf3762f130a20f765abbe81865_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6032 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4426,2.962296,1.721132,1.492092,2.962296
46,1.4141,0.769004,0.876929,0.71662,0.769004
69,0.8249,0.744587,0.862895,0.701896,0.744587


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 6.4547 - root_mean_squared_error: 2.5406
Epoch 1: val_root_mean_squared_error improved from inf to 0.57352, saving model to cache/ensemble_camembert-base/models/mlp/2234f53ac1fbcc4c2acb7bb3e0088cff79de93fdaa13fa592ad9317737fa378e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0237 - root_mean_squared_error: 1.6778 - val_loss: 0.3289 - val_root_mean_squared_error: 0.5735
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3254 - root_mean_squared_error: 0.5704
Epoch 2: val_root_mean_squared_error did not improve from 0.57352
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4839 - root_mean_squared_error: 0.6940 - val_loss: 0.5177 - val_root_mean_squared_error: 0.7195
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3499,2.667492,1.633246,1.423728,2.667492
46,1.3335,0.644931,0.803076,0.663525,0.644931
69,0.8141,0.634461,0.79653,0.649495,0.634461


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 7.1867 - root_mean_squared_error: 2.6808
Epoch 1: val_root_mean_squared_error improved from inf to 0.44129, saving model to cache/ensemble_camembert-base/models/mlp/48f1f6fc14b3de91c86c80b8f23c432fc07ea2cc7a51f03b28b743296411b248_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9674 - root_mean_squared_error: 1.6506 - val_loss: 0.1947 - val_root_mean_squared_error: 0.4413
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2333 - root_mean_squared_error: 0.4830
Epoch 2: val_root_mean_squared_error improved from 0.44129 to 0.37021, saving model to cache/ensemble_camembert-base/models/mlp/48f1f6fc14b3de91c86c80b8f23c432fc07ea2cc7a51f03b28b743296411b248_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3897 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0073,2.002517,1.415103,1.208863,2.002517
46,1.2599,0.625279,0.790746,0.650029,0.625279
69,0.8603,0.572768,0.756814,0.620021,0.572768


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 7.6611 - root_mean_squared_error: 2.7679
Epoch 1: val_root_mean_squared_error improved from inf to 0.56570, saving model to cache/ensemble_camembert-base/models/mlp/436dbe684b7a5fb248ccfa5510592944a68e1d97b9091e58fcdd68aea785589c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9538 - root_mean_squared_error: 1.6455 - val_loss: 0.3200 - val_root_mean_squared_error: 0.5657
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 102ms/step - loss: 0.5302 - root_mean_squared_error: 0.7282
Epoch 2: val_root_mean_squared_error improved from 0.56570 to 0.35983, saving model to cache/ensemble_camembert-base/models/mlp/436dbe684b7a5fb248ccfa5510592944a68e1d97b9091e58fcdd68aea785589c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6334 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0246,2.552873,1.597771,1.298443,2.552873
46,1.3386,0.914177,0.956126,0.843914,0.914177
69,0.861,0.832214,0.912258,0.803789,0.832214


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.7018 - root_mean_squared_error: 2.7752
Epoch 1: val_root_mean_squared_error improved from inf to 0.77413, saving model to cache/ensemble_camembert-base/models/mlp/89a1b7ca90a7ae3ec5997d0b36bff37f5050a9c3287a72bd8891183bf6951e04_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7813 - root_mean_squared_error: 1.5946 - val_loss: 0.5993 - val_root_mean_squared_error: 0.7741
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4779 - root_mean_squared_error: 0.6913
Epoch 2: val_root_mean_squared_error improved from 0.77413 to 0.41595, saving model to cache/ensemble_camembert-base/models/mlp/89a1b7ca90a7ae3ec5997d0b36bff37f5050a9c3287a72bd8891183bf6951e04_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5530 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3209,2.065887,1.437319,1.177054,2.065887
46,1.3582,0.774552,0.880086,0.735156,0.774552
69,0.7493,0.802433,0.895786,0.74619,0.802433


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 647ms/step - loss: 7.5947 - root_mean_squared_error: 2.7559
Epoch 1: val_root_mean_squared_error improved from inf to 0.51655, saving model to cache/ensemble_camembert-base/models/mlp/39b40efd35157c83271fce2258d3fb7922ba1bd330144fbd18911e62e894b850_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3193 - root_mean_squared_error: 1.7448 - val_loss: 0.2668 - val_root_mean_squared_error: 0.5165
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1637 - root_mean_squared_error: 0.4047
Epoch 2: val_root_mean_squared_error did not improve from 0.51655
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5544 - root_mean_squared_error: 0.7378 - val_loss: 0.5435 - val_root_mean_squared_error: 0.7372
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7629,2.667145,1.63314,1.388239,2.667145
46,1.1311,0.768821,0.876824,0.695951,0.768821
69,0.7934,0.557718,0.746805,0.559893,0.557718


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 4.9410 - root_mean_squared_error: 2.2228
Epoch 1: val_root_mean_squared_error improved from inf to 0.76546, saving model to cache/ensemble_camembert-base/models/mlp/ae2e1a66e05ced8912f85c816ab9b17f97c1b30e2fd5f110c260782495713c07_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7142 - root_mean_squared_error: 1.5837 - val_loss: 0.5859 - val_root_mean_squared_error: 0.7655
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4174 - root_mean_squared_error: 0.6460
Epoch 2: val_root_mean_squared_error improved from 0.76546 to 0.42738, saving model to cache/ensemble_camembert-base/models/mlp/ae2e1a66e05ced8912f85c816ab9b17f97c1b30e2fd5f110c260782495713c07_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4298 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8207,2.717161,1.648381,1.371153,2.717161
46,1.2047,0.872097,0.933861,0.743123,0.872097
69,0.973,0.87497,0.935399,0.764398,0.87497


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 4.6873 - root_mean_squared_error: 2.1650
Epoch 1: val_root_mean_squared_error improved from inf to 0.75720, saving model to cache/ensemble_camembert-base/models/mlp/c08f9ed9655309ab0802cddf294509a9d64a3b892a5781350a2d52828796b019_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8394 - root_mean_squared_error: 1.6301 - val_loss: 0.5734 - val_root_mean_squared_error: 0.7572
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3844 - root_mean_squared_error: 0.6200
Epoch 2: val_root_mean_squared_error improved from 0.75720 to 0.55604, saving model to cache/ensemble_camembert-base/models/mlp/c08f9ed9655309ab0802cddf294509a9d64a3b892a5781350a2d52828796b019_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4203 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3867,3.716712,1.927878,1.716113,3.716713
46,1.3077,0.878932,0.937514,0.74879,0.878932
69,0.9811,0.866757,0.930998,0.739139,0.866757


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 695ms/step - loss: 8.8930 - root_mean_squared_error: 2.9821
Epoch 1: val_root_mean_squared_error improved from inf to 0.59011, saving model to cache/ensemble_camembert-base/models/mlp/d5583e4c67c48a228f35c2e9f0729b39b80ad67c846c06e6d62c20a532191b8c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0617 - root_mean_squared_error: 1.6637 - val_loss: 0.3482 - val_root_mean_squared_error: 0.5901
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4230 - root_mean_squared_error: 0.6504
Epoch 2: val_root_mean_squared_error did not improve from 0.59011
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3364 - root_mean_squared_error: 0.5754 - val_loss: 1.1403 - val_root_mean_squared_error: 1.0678
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9515,3.506286,1.872508,1.589762,3.506286
46,1.6138,1.064613,1.031801,0.762711,1.064613
69,0.8674,0.891178,0.944023,0.742572,0.891178


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 8.8456 - root_mean_squared_error: 2.9742
Epoch 1: val_root_mean_squared_error improved from inf to 0.69613, saving model to cache/ensemble_camembert-base/models/mlp/046669bded4eb526f94b634a704da991e82ceafe1170f0bbd45079b8b8116890_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8894 - root_mean_squared_error: 1.6132 - val_loss: 0.4846 - val_root_mean_squared_error: 0.6961
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5918 - root_mean_squared_error: 0.7693
Epoch 2: val_root_mean_squared_error did not improve from 0.69613
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7626 - root_mean_squared_error: 0.8632 - val_loss: 0.5956 - val_root_mean_squared_error: 0.7717
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0181,2.624449,1.620015,1.372852,2.624449
46,1.4661,0.743912,0.862503,0.67316,0.743912
69,0.9054,0.697766,0.835324,0.659296,0.697766


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 698ms/step - loss: 5.2428 - root_mean_squared_error: 2.2897
Epoch 1: val_root_mean_squared_error improved from inf to 0.74693, saving model to cache/ensemble_camembert-base/models/mlp/b7b0b74bd389cd4600fb1c924840f539de51173128915e150c52fc7a7d53c2fd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5298 - root_mean_squared_error: 1.5322 - val_loss: 0.5579 - val_root_mean_squared_error: 0.7469
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2003 - root_mean_squared_error: 0.4475
Epoch 2: val_root_mean_squared_error improved from 0.74693 to 0.37326, saving model to cache/ensemble_camembert-base/models/mlp/b7b0b74bd389cd4600fb1c924840f539de51173128915e150c52fc7a7d53c2fd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3562 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3143,2.884698,1.69844,1.46775,2.884698
46,1.7386,0.769158,0.877017,0.679327,0.769158
69,0.8615,0.664657,0.815265,0.639348,0.664657


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.3867 - root_mean_squared_error: 2.7178
Epoch 1: val_root_mean_squared_error improved from inf to 0.99743, saving model to cache/ensemble_camembert-base/models/mlp/095265363eb408ee772168200c9599c4ca2b8b3c5ca57df55a4c507092b76fca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7217 - root_mean_squared_error: 1.5764 - val_loss: 0.9949 - val_root_mean_squared_error: 0.9974
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.2206 - root_mean_squared_error: 1.1048
Epoch 2: val_root_mean_squared_error improved from 0.99743 to 0.86846, saving model to cache/ensemble_camembert-base/models/mlp/095265363eb408ee772168200c9599c4ca2b8b3c5ca57df55a4c507092b76fca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.9882 - root_mean_squared_error: 0.9

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.03,1.903634,1.379722,1.13482,1.903634
46,1.8689,0.682017,0.825843,0.713422,0.682017
69,0.791,0.536283,0.732314,0.614753,0.536283


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.2993 - root_mean_squared_error: 2.7017
Epoch 1: val_root_mean_squared_error improved from inf to 0.50705, saving model to cache/ensemble_camembert-base/models/mlp/27fa6663f4946e24e468ade7e23ac63fc914c31bdc4cb045372b9aa696326b46_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4578 - root_mean_squared_error: 1.7806 - val_loss: 0.2571 - val_root_mean_squared_error: 0.5071
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1454 - root_mean_squared_error: 0.3813
Epoch 2: val_root_mean_squared_error did not improve from 0.50705
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5895 - root_mean_squared_error: 0.7478 - val_loss: 0.6511 - val_root_mean_squared_error: 0.8069
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2944,2.726176,1.651114,1.37612,2.726176
46,1.3945,0.799563,0.894183,0.697312,0.799563
69,0.7855,0.764399,0.874299,0.692712,0.764399


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 4.6728 - root_mean_squared_error: 2.1617
Epoch 1: val_root_mean_squared_error improved from inf to 0.59949, saving model to cache/ensemble_camembert-base/models/mlp/fc9839c805b360b0fa9ae962b9afac00a945b4cf8d9a2d068df9712894d0fba4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6806 - root_mean_squared_error: 1.5882 - val_loss: 0.3594 - val_root_mean_squared_error: 0.5995
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3346 - root_mean_squared_error: 0.5784
Epoch 2: val_root_mean_squared_error did not improve from 0.59949
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7756 - root_mean_squared_error: 0.8703 - val_loss: 0.5450 - val_root_mean_squared_error: 0.7382
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8619,2.836382,1.684156,1.362076,2.836382
46,1.3783,0.985372,0.992659,0.842229,0.985372
69,0.7592,0.838076,0.915465,0.748431,0.838076


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 7.4592 - root_mean_squared_error: 2.7312
Epoch 1: val_root_mean_squared_error improved from inf to 0.91113, saving model to cache/ensemble_camembert-base/models/mlp/006e6b92f1e08681bedd99c1c71c4c656b51bc9e5cb7ca05f3cc9f6f3851c595_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3670 - root_mean_squared_error: 1.7591 - val_loss: 0.8302 - val_root_mean_squared_error: 0.9111
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4979 - root_mean_squared_error: 0.7056
Epoch 2: val_root_mean_squared_error improved from 0.91113 to 0.75578, saving model to cache/ensemble_camembert-base/models/mlp/006e6b92f1e08681bedd99c1c71c4c656b51bc9e5cb7ca05f3cc9f6f3851c595_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4391 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3038,3.490531,1.868296,1.596755,3.490531
46,1.5712,0.973495,0.986658,0.754021,0.973495
69,0.8256,0.894531,0.945797,0.768661,0.894531


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 6.1321 - root_mean_squared_error: 2.4763
Epoch 1: val_root_mean_squared_error improved from inf to 0.65210, saving model to cache/ensemble_camembert-base/models/mlp/49bec529ad8d68925f64afc706996070e79d54e98bd3ecd07704d2fc81a3d290_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3790 - root_mean_squared_error: 1.7650 - val_loss: 0.4252 - val_root_mean_squared_error: 0.6521
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5344 - root_mean_squared_error: 0.7310
Epoch 2: val_root_mean_squared_error improved from 0.65210 to 0.44602, saving model to cache/ensemble_camembert-base/models/mlp/49bec529ad8d68925f64afc706996070e79d54e98bd3ecd07704d2fc81a3d290_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3964 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3669,3.412091,1.847184,1.559143,3.41209
46,1.4417,1.010337,1.005155,0.817063,1.010337
69,0.6036,0.731136,0.855065,0.670124,0.731136


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 8.3791 - root_mean_squared_error: 2.8947
Epoch 1: val_root_mean_squared_error improved from inf to 0.75524, saving model to cache/ensemble_camembert-base/models/mlp/b2a9ce0164f7f56ace57688da928ad7adffde891626f06b3921759250a5b02ce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1993 - root_mean_squared_error: 1.7117 - val_loss: 0.5704 - val_root_mean_squared_error: 0.7552
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2665 - root_mean_squared_error: 0.5163
Epoch 2: val_root_mean_squared_error improved from 0.75524 to 0.40290, saving model to cache/ensemble_camembert-base/models/mlp/b2a9ce0164f7f56ace57688da928ad7adffde891626f06b3921759250a5b02ce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4586 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2742,3.013041,1.735811,1.33712,3.013041
46,1.3851,1.226885,1.107648,0.926819,1.226885
69,0.7174,1.189632,1.090702,0.931979,1.189632


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 646ms/step - loss: 8.5110 - root_mean_squared_error: 2.9174
Epoch 1: val_root_mean_squared_error improved from inf to 0.63460, saving model to cache/ensemble_camembert-base/models/mlp/cf185f594b57fc7b20804e86ce3bd67c8549dbef160e93b6a6bb2a81540246f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8198 - root_mean_squared_error: 1.6008 - val_loss: 0.4027 - val_root_mean_squared_error: 0.6346
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5185 - root_mean_squared_error: 0.7200
Epoch 2: val_root_mean_squared_error improved from 0.63460 to 0.45676, saving model to cache/ensemble_camembert-base/models/mlp/cf185f594b57fc7b20804e86ce3bd67c8549dbef160e93b6a6bb2a81540246f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5155 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3844,2.457908,1.567772,1.260497,2.457908
46,1.4915,0.943306,0.97124,0.763121,0.943306
69,0.7868,0.907949,0.952863,0.740006,0.907949


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 6.0069 - root_mean_squared_error: 2.4509
Epoch 1: val_root_mean_squared_error improved from inf to 0.58372, saving model to cache/ensemble_camembert-base/models/mlp/c24bae4a232597417afa0883aaca76b9267952ce314546f0e463c4442e56b3f8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6951 - root_mean_squared_error: 1.5728 - val_loss: 0.3407 - val_root_mean_squared_error: 0.5837
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2390 - root_mean_squared_error: 0.4889
Epoch 2: val_root_mean_squared_error did not improve from 0.58372
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.8423 - root_mean_squared_error: 0.8999 - val_loss: 0.7896 - val_root_mean_squared_error: 0.8886
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7198,2.132649,1.460359,1.238055,2.13265
46,1.1777,0.681618,0.825602,0.66823,0.681618
69,0.8325,0.625325,0.790775,0.633238,0.625325


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 643ms/step - loss: 5.8886 - root_mean_squared_error: 2.4266
Epoch 1: val_root_mean_squared_error improved from inf to 0.74208, saving model to cache/ensemble_camembert-base/models/mlp/9dd0bcbfe8a9d1b0e3ced789f2aed0abb5ac05077df263d2b45578c2971a8b64_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4090 - root_mean_squared_error: 1.4915 - val_loss: 0.5507 - val_root_mean_squared_error: 0.7421
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9845 - root_mean_squared_error: 0.9922
Epoch 2: val_root_mean_squared_error did not improve from 0.74208
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7077 - root_mean_squared_error: 0.8230 - val_loss: 0.7010 - val_root_mean_squared_error: 0.8373
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5114,2.213567,1.487806,1.249376,2.213568
46,1.3725,0.68623,0.82839,0.683158,0.68623
69,0.8772,0.634126,0.79632,0.658272,0.634126


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 4.7224 - root_mean_squared_error: 2.1731
Epoch 1: val_root_mean_squared_error improved from inf to 0.61973, saving model to cache/ensemble_camembert-base/models/mlp/f4de0cb00be1f6c7f3b79a2cefa1cc53db11c0fbda5c94979f4191552b2b9143_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5916 - root_mean_squared_error: 1.5565 - val_loss: 0.3841 - val_root_mean_squared_error: 0.6197
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.3056 - root_mean_squared_error: 0.5528
Epoch 2: val_root_mean_squared_error did not improve from 0.61973
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4747 - root_mean_squared_error: 0.6863 - val_loss: 0.4820 - val_root_mean_squared_error: 0.6943
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9644,1.681509,1.29673,1.101862,1.681509
46,1.3915,0.637103,0.798187,0.637002,0.637103
69,0.9381,0.511094,0.714908,0.576645,0.511094


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 687ms/step - loss: 8.5803 - root_mean_squared_error: 2.9292
Epoch 1: val_root_mean_squared_error improved from inf to 0.71251, saving model to cache/ensemble_camembert-base/models/mlp/02d77c0935b1550a904e5e6fdf1cf36954ec4352cfb635254654ed04f183fe62_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9861 - root_mean_squared_error: 1.6526 - val_loss: 0.5077 - val_root_mean_squared_error: 0.7125
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.4136 - root_mean_squared_error: 0.6431
Epoch 2: val_root_mean_squared_error improved from 0.71251 to 0.42971, saving model to cache/ensemble_camembert-base/models/mlp/02d77c0935b1550a904e5e6fdf1cf36954ec4352cfb635254654ed04f183fe62_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5426 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6464,2.696533,1.642112,1.427345,2.696533
46,1.2149,0.697868,0.835385,0.641829,0.697868
69,0.8639,0.685894,0.828187,0.661627,0.685894


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 5.1967 - root_mean_squared_error: 2.2796
Epoch 1: val_root_mean_squared_error improved from inf to 0.64274, saving model to cache/ensemble_camembert-base/models/mlp/997254962ddefe227569a42e24e51898dc13590b4207bff0d24ccc1b3571687d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3983 - root_mean_squared_error: 1.4986 - val_loss: 0.4131 - val_root_mean_squared_error: 0.6427
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3956 - root_mean_squared_error: 0.6290
Epoch 2: val_root_mean_squared_error did not improve from 0.64274
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.8034 - root_mean_squared_error: 0.8815 - val_loss: 0.5727 - val_root_mean_squared_error: 0.7567
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4949,3.464577,1.861337,1.536276,3.464576
46,1.2539,1.151912,1.073271,0.886054,1.151912
69,0.6578,1.237711,1.112525,0.883993,1.237711


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 643ms/step - loss: 8.4777 - root_mean_squared_error: 2.9116
Epoch 1: val_root_mean_squared_error improved from inf to 0.63493, saving model to cache/ensemble_camembert-base/models/mlp/8d41d49f14bc51adc877083aa3799bce461f88672a3661543353ece740d8f903_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8449 - root_mean_squared_error: 1.6051 - val_loss: 0.4031 - val_root_mean_squared_error: 0.6349
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2326 - root_mean_squared_error: 0.4823
Epoch 2: val_root_mean_squared_error improved from 0.63493 to 0.56437, saving model to cache/ensemble_camembert-base/models/mlp/8d41d49f14bc51adc877083aa3799bce461f88672a3661543353ece740d8f903_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4552 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9472,2.831243,1.68263,1.366274,2.831243
46,1.4493,1.006334,1.003162,0.825694,1.006334
69,0.888,1.037348,1.018503,0.834965,1.037348


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 647ms/step - loss: 6.0345 - root_mean_squared_error: 2.4565
Epoch 1: val_root_mean_squared_error improved from inf to 0.59447, saving model to cache/ensemble_camembert-base/models/mlp/f0814ebe8f8e9e305b9f5c6b641236468675b4eb3d8137449aa7dbb284e02454_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6221 - root_mean_squared_error: 1.5591 - val_loss: 0.3534 - val_root_mean_squared_error: 0.5945
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2337 - root_mean_squared_error: 0.4834
Epoch 2: val_root_mean_squared_error did not improve from 0.59447
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5044 - root_mean_squared_error: 0.7024 - val_loss: 1.3399 - val_root_mean_squared_error: 1.1575
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.003,2.472684,1.572477,1.255715,2.472684
46,1.4031,0.959273,0.979425,0.766709,0.959273
69,0.9372,1.071175,1.034976,0.841985,1.071175


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 6.8922 - root_mean_squared_error: 2.6253
Epoch 1: val_root_mean_squared_error improved from inf to 0.48681, saving model to cache/ensemble_camembert-base/models/mlp/53e3cfbc2042fca9cdd32768a3cbd77c2f5a9a784a0b77a99cf4a19f30c5c78c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9117 - root_mean_squared_error: 1.6343 - val_loss: 0.2370 - val_root_mean_squared_error: 0.4868
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1903 - root_mean_squared_error: 0.4363
Epoch 2: val_root_mean_squared_error did not improve from 0.48681
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5440 - root_mean_squared_error: 0.7264 - val_loss: 0.5066 - val_root_mean_squared_error: 0.7117
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4439,2.041652,1.428864,1.223613,2.041652
46,1.3954,0.590611,0.768512,0.605373,0.590611
69,0.8436,0.627038,0.791858,0.651578,0.627038


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 643ms/step - loss: 7.2457 - root_mean_squared_error: 2.6918
Epoch 1: val_root_mean_squared_error improved from inf to 0.62879, saving model to cache/ensemble_camembert-base/models/mlp/83b1a39db8964db9bd4d59b269f94b00f1017352da17ce37a917e023cae3122a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7502 - root_mean_squared_error: 1.5874 - val_loss: 0.3954 - val_root_mean_squared_error: 0.6288
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4422 - root_mean_squared_error: 0.6650
Epoch 2: val_root_mean_squared_error did not improve from 0.62879
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4596 - root_mean_squared_error: 0.6736 - val_loss: 0.4959 - val_root_mean_squared_error: 0.7042
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.368,3.220251,1.794506,1.541983,3.220251
46,1.2883,0.900634,0.949017,0.774524,0.900634
69,0.9139,0.818695,0.904818,0.745585,0.818695


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 4.1988 - root_mean_squared_error: 2.0491
Epoch 1: val_root_mean_squared_error improved from inf to 0.74994, saving model to cache/ensemble_camembert-base/models/mlp/8dbb243a50642c7b195106006eea75546940ee11e734d41779ff9d55f561a856_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9161 - root_mean_squared_error: 1.6558 - val_loss: 0.5624 - val_root_mean_squared_error: 0.7499
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.9679 - root_mean_squared_error: 0.9838
Epoch 2: val_root_mean_squared_error improved from 0.74994 to 0.44268, saving model to cache/ensemble_camembert-base/models/mlp/8dbb243a50642c7b195106006eea75546940ee11e734d41779ff9d55f561a856_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6243 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1492,2.200164,1.483295,1.240269,2.200163
46,1.3895,0.72551,0.851769,0.712572,0.72551
69,0.7336,0.756106,0.869543,0.736905,0.756106


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 10.7099 - root_mean_squared_error: 3.2726
Epoch 1: val_root_mean_squared_error improved from inf to 0.93705, saving model to cache/ensemble_camembert-base/models/mlp/70ffdbf879bf2e134f7aeda6ea22a939e9b0f66cf7ef86e2776331d25709bf1a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.6390 - root_mean_squared_error: 1.8166 - val_loss: 0.8781 - val_root_mean_squared_error: 0.9370
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7560 - root_mean_squared_error: 0.8695
Epoch 2: val_root_mean_squared_error improved from 0.93705 to 0.70806, saving model to cache/ensemble_camembert-base/models/mlp/70ffdbf879bf2e134f7aeda6ea22a939e9b0f66cf7ef86e2776331d25709bf1a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6190 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5844,2.624824,1.620131,1.323621,2.624824
46,1.1534,0.880558,0.93838,0.804534,0.880558
69,0.9268,0.801816,0.895442,0.763924,0.801816


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 5.7859 - root_mean_squared_error: 2.4054
Epoch 1: val_root_mean_squared_error improved from inf to 0.80536, saving model to cache/ensemble_camembert-base/models/mlp/483c52c1f630b35f111bca22453a912d4bab4f0c1260a4cae2d5cc66abcec532_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5034 - root_mean_squared_error: 1.5247 - val_loss: 0.6486 - val_root_mean_squared_error: 0.8054
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4371 - root_mean_squared_error: 0.6611
Epoch 2: val_root_mean_squared_error improved from 0.80536 to 0.58681, saving model to cache/ensemble_camembert-base/models/mlp/483c52c1f630b35f111bca22453a912d4bab4f0c1260a4cae2d5cc66abcec532_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4960 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9151,2.07966,1.442103,1.213858,2.07966
46,1.3876,0.657884,0.8111,0.672073,0.657884
69,0.8838,0.561564,0.749376,0.620314,0.561564


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 7.5239 - root_mean_squared_error: 2.7430
Epoch 1: val_root_mean_squared_error improved from inf to 0.41868, saving model to cache/ensemble_camembert-base/models/mlp/c93d1d8b63c88973e0bf2afa410b1f498d509b4f864c5c87c377408a1d17c084_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9694 - root_mean_squared_error: 1.6497 - val_loss: 0.1753 - val_root_mean_squared_error: 0.4187
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7901 - root_mean_squared_error: 0.8889
Epoch 2: val_root_mean_squared_error improved from 0.41868 to 0.33160, saving model to cache/ensemble_camembert-base/models/mlp/c93d1d8b63c88973e0bf2afa410b1f498d509b4f864c5c87c377408a1d17c084_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5539 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0404,2.857891,1.69053,1.343118,2.857891
46,1.5768,1.114744,1.055814,0.863356,1.114744
69,0.6715,1.129571,1.062813,0.86847,1.129571


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 705ms/step - loss: 9.9718 - root_mean_squared_error: 3.1578
Epoch 1: val_root_mean_squared_error improved from inf to 0.55167, saving model to cache/ensemble_camembert-base/models/mlp/4dc8f5a523b82a95c57278aa7e88f6f57e970bfcf77360f48f5203d3d4f74837_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9217 - root_mean_squared_error: 1.6243 - val_loss: 0.3043 - val_root_mean_squared_error: 0.5517
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.6059 - root_mean_squared_error: 0.7784
Epoch 2: val_root_mean_squared_error improved from 0.55167 to 0.42166, saving model to cache/ensemble_camembert-base/models/mlp/4dc8f5a523b82a95c57278aa7e88f6f57e970bfcf77360f48f5203d3d4f74837_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.8685 - root_mean_squared_error: 0.9

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9769,3.058767,1.748933,1.472105,3.058767
46,1.1478,0.920278,0.959311,0.777829,0.920279
69,0.8676,0.917517,0.957871,0.777215,0.917517


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.8576 - root_mean_squared_error: 2.8031
Epoch 1: val_root_mean_squared_error improved from inf to 1.00074, saving model to cache/ensemble_camembert-base/models/mlp/8afedf718fd09155412cd08bc9ef17cf89fbdea2e43a89c8385fe3cda7ec84b9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2080 - root_mean_squared_error: 1.7154 - val_loss: 1.0015 - val_root_mean_squared_error: 1.0007
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7614 - root_mean_squared_error: 0.8726
Epoch 2: val_root_mean_squared_error improved from 1.00074 to 0.54327, saving model to cache/ensemble_camembert-base/models/mlp/8afedf718fd09155412cd08bc9ef17cf89fbdea2e43a89c8385fe3cda7ec84b9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4874 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2547,2.133232,1.460559,1.208928,2.133232
46,1.4429,0.739497,0.85994,0.70741,0.739497
69,0.8987,0.768239,0.876493,0.719284,0.768239


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 7.9726 - root_mean_squared_error: 2.8236
Epoch 1: val_root_mean_squared_error improved from inf to 0.54721, saving model to cache/ensemble_camembert-base/models/mlp/9421b04c2d8a75ef269a142243e2c9d0ff0a79540f4e08ac6ca0a808c54dd99d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6129 - root_mean_squared_error: 1.5473 - val_loss: 0.2994 - val_root_mean_squared_error: 0.5472
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4470 - root_mean_squared_error: 0.6686
Epoch 2: val_root_mean_squared_error improved from 0.54721 to 0.44364, saving model to cache/ensemble_camembert-base/models/mlp/9421b04c2d8a75ef269a142243e2c9d0ff0a79540f4e08ac6ca0a808c54dd99d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5285 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7055,3.422017,1.849869,1.549979,3.422017
46,1.5089,1.054337,1.026809,0.793629,1.054337
69,0.8782,0.9709,0.985343,0.776898,0.9709


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 6.9329 - root_mean_squared_error: 2.6330
Epoch 1: val_root_mean_squared_error improved from inf to 0.53816, saving model to cache/ensemble_camembert-base/models/mlp/b87fc93c444beb317690a0cdff38ec95e97539572aea9d8fd1d1493a9a70c464_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9634 - root_mean_squared_error: 1.6502 - val_loss: 0.2896 - val_root_mean_squared_error: 0.5382
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3342 - root_mean_squared_error: 0.5781
Epoch 2: val_root_mean_squared_error improved from 0.53816 to 0.44662, saving model to cache/ensemble_camembert-base/models/mlp/b87fc93c444beb317690a0cdff38ec95e97539572aea9d8fd1d1493a9a70c464_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4600 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2954,2.886232,1.698891,1.449437,2.886232
46,1.4172,0.792328,0.890128,0.718831,0.792328
69,0.7184,0.769933,0.877458,0.71015,0.769933


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 7.4189 - root_mean_squared_error: 2.7238
Epoch 1: val_root_mean_squared_error improved from inf to 0.73729, saving model to cache/ensemble_camembert-base/models/mlp/1be993e12967a8ba1c83c27ebea1aee09566abf310f07a6a7cab6b310970de73_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2371 - root_mean_squared_error: 1.7247 - val_loss: 0.5436 - val_root_mean_squared_error: 0.7373
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4865 - root_mean_squared_error: 0.6975
Epoch 2: val_root_mean_squared_error improved from 0.73729 to 0.59915, saving model to cache/ensemble_camembert-base/models/mlp/1be993e12967a8ba1c83c27ebea1aee09566abf310f07a6a7cab6b310970de73_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4978 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8595,2.765173,1.662881,1.436559,2.765173
46,1.2515,0.70435,0.839256,0.716276,0.70435
69,0.8056,0.66364,0.814641,0.690701,0.66364


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 6.1106 - root_mean_squared_error: 2.4720
Epoch 1: val_root_mean_squared_error improved from inf to 0.60028, saving model to cache/ensemble_camembert-base/models/mlp/125afbfde21d6fce4900b6beb18125989c8d345fb1f51e204d8354210c50de35_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4816 - root_mean_squared_error: 1.5175 - val_loss: 0.3603 - val_root_mean_squared_error: 0.6003
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1863 - root_mean_squared_error: 0.4316
Epoch 2: val_root_mean_squared_error improved from 0.60028 to 0.58540, saving model to cache/ensemble_camembert-base/models/mlp/125afbfde21d6fce4900b6beb18125989c8d345fb1f51e204d8354210c50de35_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4458 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.445,3.215996,1.79332,1.556468,3.215996
46,1.1245,0.883431,0.93991,0.749978,0.883431
69,0.7874,0.839534,0.916261,0.733839,0.839534


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 7.4482 - root_mean_squared_error: 2.7291
Epoch 1: val_root_mean_squared_error improved from inf to 0.57052, saving model to cache/ensemble_camembert-base/models/mlp/cb6873f833a31fdfa2b1f0fd149dc95e78c529e0a63b55b4b3135f49dbb89a87_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 2.9778 - root_mean_squared_error: 1.6522 - val_loss: 0.3255 - val_root_mean_squared_error: 0.5705
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.3701 - root_mean_squared_error: 0.6084
Epoch 2: val_root_mean_squared_error improved from 0.57052 to 0.45927, saving model to cache/ensemble_camembert-base/models/mlp/cb6873f833a31fdfa2b1f0fd149dc95e78c529e0a63b55b4b3135f49dbb89a87_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4614 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5506,2.085042,1.443967,1.245541,2.085042
46,1.6499,0.593513,0.770398,0.618546,0.593513
69,0.8201,0.543436,0.737181,0.596412,0.543436


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 642ms/step - loss: 9.4096 - root_mean_squared_error: 3.0675
Epoch 1: val_root_mean_squared_error improved from inf to 0.62541, saving model to cache/ensemble_camembert-base/models/mlp/9086824a215aa3a754fbd8efbae63ad1d6264fb63944d6b33c452d433dc8a2bd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3377 - root_mean_squared_error: 1.7452 - val_loss: 0.3911 - val_root_mean_squared_error: 0.6254
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3791 - root_mean_squared_error: 0.6157
Epoch 2: val_root_mean_squared_error did not improve from 0.62541
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4855 - root_mean_squared_error: 0.6919 - val_loss: 1.3780 - val_root_mean_squared_error: 1.1739
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9911,2.095151,1.447464,1.266051,2.095151
46,1.5426,0.494065,0.702897,0.587007,0.494065
69,1.0242,0.506339,0.711575,0.596909,0.506339


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 6.4633 - root_mean_squared_error: 2.5423
Epoch 1: val_root_mean_squared_error improved from inf to 0.54149, saving model to cache/ensemble_camembert-base/models/mlp/1bc5b333d0ae5230beb38025c6ecd92a1cce88dcef8bc965126ea107ea365691_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7192 - root_mean_squared_error: 1.5851 - val_loss: 0.2932 - val_root_mean_squared_error: 0.5415
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2355 - root_mean_squared_error: 0.4853
Epoch 2: val_root_mean_squared_error improved from 0.54149 to 0.38684, saving model to cache/ensemble_camembert-base/models/mlp/1bc5b333d0ae5230beb38025c6ecd92a1cce88dcef8bc965126ea107ea365691_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5152 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3574,3.209159,1.791413,1.521539,3.209159
46,1.1569,0.923253,0.96086,0.788198,0.923253
69,0.7152,0.878024,0.937029,0.765213,0.878024


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 4.7616 - root_mean_squared_error: 2.1821
Epoch 1: val_root_mean_squared_error improved from inf to 0.83747, saving model to cache/ensemble_camembert-base/models/mlp/052a413bf3b007ee863fabd842e7c182894a9c6f615dad61c66192c22641ce00_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 2.1723 - root_mean_squared_error: 1.4205 - val_loss: 0.7014 - val_root_mean_squared_error: 0.8375
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2230 - root_mean_squared_error: 0.4723
Epoch 2: val_root_mean_squared_error improved from 0.83747 to 0.35293, saving model to cache/ensemble_camembert-base/models/mlp/052a413bf3b007ee863fabd842e7c182894a9c6f615dad61c66192c22641ce00_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5042 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1362,2.404794,1.55074,1.239951,2.404794
46,1.4208,0.915711,0.956928,0.833176,0.915711
69,0.7985,0.90235,0.949921,0.829702,0.90235


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 6.6042 - root_mean_squared_error: 2.5699
Epoch 1: val_root_mean_squared_error improved from inf to 0.64908, saving model to cache/ensemble_camembert-base/models/mlp/187204402837291fc3e3147e4a4bec1d21ef3b70e47302c50b36064d69428e13_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8131 - root_mean_squared_error: 1.6170 - val_loss: 0.4213 - val_root_mean_squared_error: 0.6491
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1640 - root_mean_squared_error: 0.4050
Epoch 2: val_root_mean_squared_error improved from 0.64908 to 0.50528, saving model to cache/ensemble_camembert-base/models/mlp/187204402837291fc3e3147e4a4bec1d21ef3b70e47302c50b36064d69428e13_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7077 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0573,2.266391,1.505454,1.293326,2.266391
46,1.4922,0.58803,0.766831,0.63224,0.58803
69,0.7615,0.483519,0.695356,0.586725,0.483519


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 6.2767 - root_mean_squared_error: 2.5053
Epoch 1: val_root_mean_squared_error improved from inf to 0.55014, saving model to cache/ensemble_camembert-base/models/mlp/af0b1614d42172262aa063f83af5042bafb9d87d5762dfe48ef50b307aaafe7f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8614 - root_mean_squared_error: 1.6276 - val_loss: 0.3027 - val_root_mean_squared_error: 0.5501
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1393 - root_mean_squared_error: 0.3733
Epoch 2: val_root_mean_squared_error did not improve from 0.55014
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4312 - root_mean_squared_error: 0.6496 - val_loss: 0.4620 - val_root_mean_squared_error: 0.6797
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2472,2.752789,1.659153,1.393388,2.752789
46,1.4824,0.836307,0.914498,0.70619,0.836307
69,0.7144,0.803373,0.896311,0.702499,0.803373


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 5.9710 - root_mean_squared_error: 2.4436
Epoch 1: val_root_mean_squared_error improved from inf to 0.57661, saving model to cache/ensemble_camembert-base/models/mlp/a596d8a06aeb2a7f837975282b4a4cfc3b0639c68c419d087a9f347d58f9765d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7024 - root_mean_squared_error: 1.5835 - val_loss: 0.3325 - val_root_mean_squared_error: 0.5766
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2994 - root_mean_squared_error: 0.5472
Epoch 2: val_root_mean_squared_error improved from 0.57661 to 0.51404, saving model to cache/ensemble_camembert-base/models/mlp/a596d8a06aeb2a7f837975282b4a4cfc3b0639c68c419d087a9f347d58f9765d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4085 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0592,2.868878,1.693776,1.424565,2.868878
46,1.2216,0.836055,0.914361,0.762202,0.836055
69,0.867,0.77819,0.882151,0.73214,0.77819


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 674ms/step - loss: 6.5477 - root_mean_squared_error: 2.5589
Epoch 1: val_root_mean_squared_error improved from inf to 0.56878, saving model to cache/ensemble_camembert-base/models/mlp/a25f26fe7618185237b5994dac4a484254d3e981ed2b9bc9c4cd88ae67097ae3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0340 - root_mean_squared_error: 1.6793 - val_loss: 0.3235 - val_root_mean_squared_error: 0.5688
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5249 - root_mean_squared_error: 0.7245
Epoch 2: val_root_mean_squared_error did not improve from 0.56878
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5944 - root_mean_squared_error: 0.7688 - val_loss: 0.5512 - val_root_mean_squared_error: 0.7424
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0242,3.859268,1.964502,1.65675,3.859268
46,1.4746,1.207165,1.098711,0.929598,1.207165
69,0.961,1.139046,1.067261,0.948429,1.139046


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 5.4069 - root_mean_squared_error: 2.3253
Epoch 1: val_root_mean_squared_error improved from inf to 0.88429, saving model to cache/ensemble_camembert-base/models/mlp/95fd9d695785a6d72912c0c14185e2207b36dc63f3376dc1cdf28a67c55ea839_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4899 - root_mean_squared_error: 1.5281 - val_loss: 0.7820 - val_root_mean_squared_error: 0.8843
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4897 - root_mean_squared_error: 0.6998
Epoch 2: val_root_mean_squared_error improved from 0.88429 to 0.67349, saving model to cache/ensemble_camembert-base/models/mlp/95fd9d695785a6d72912c0c14185e2207b36dc63f3376dc1cdf28a67c55ea839_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2926 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3936,2.584641,1.607682,1.420633,2.584641
46,1.3661,0.592128,0.769499,0.601366,0.592128
69,1.0355,0.577987,0.760255,0.60123,0.577987


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 9.4321 - root_mean_squared_error: 3.0712
Epoch 1: val_root_mean_squared_error improved from inf to 1.00911, saving model to cache/ensemble_camembert-base/models/mlp/af52127018501ffc43aa59e690e964b2abcaa234084168b6a40cd735844600f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2638 - root_mean_squared_error: 1.7209 - val_loss: 1.0183 - val_root_mean_squared_error: 1.0091
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.1276 - root_mean_squared_error: 1.0619
Epoch 2: val_root_mean_squared_error did not improve from 1.00911
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5036 - root_mean_squared_error: 0.7028 - val_loss: 1.0531 - val_root_mean_squared_error: 1.0262
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1357,2.244538,1.498178,1.219115,2.244539
46,1.3452,0.791515,0.889672,0.769105,0.791516
69,0.8553,0.8354,0.914002,0.795936,0.8354


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 9.0047 - root_mean_squared_error: 3.0008
Epoch 1: val_root_mean_squared_error improved from inf to 0.59191, saving model to cache/ensemble_camembert-base/models/mlp/0bc6c29be7f243b9e738e0f32a52821511f6c0f190f13b0532e71f9b46ea18a2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4517 - root_mean_squared_error: 1.7705 - val_loss: 0.3504 - val_root_mean_squared_error: 0.5919
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3949 - root_mean_squared_error: 0.6284
Epoch 2: val_root_mean_squared_error improved from 0.59191 to 0.47510, saving model to cache/ensemble_camembert-base/models/mlp/0bc6c29be7f243b9e738e0f32a52821511f6c0f190f13b0532e71f9b46ea18a2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4522 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6185,1.900262,1.3785,1.183866,1.900262
46,1.3938,0.570605,0.755384,0.584929,0.570605
69,0.8485,0.534166,0.730867,0.570613,0.534166


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.6078 - root_mean_squared_error: 2.5706
Epoch 1: val_root_mean_squared_error improved from inf to 0.61866, saving model to cache/ensemble_camembert-base/models/mlp/183f903ea3ace4c6c84b94fc7c5326c54cf755f66e7808187f741cb6ab8c9ad4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6468 - root_mean_squared_error: 1.5585 - val_loss: 0.3827 - val_root_mean_squared_error: 0.6187
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.3616 - root_mean_squared_error: 0.6014
Epoch 2: val_root_mean_squared_error did not improve from 0.61866
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5420 - root_mean_squared_error: 0.7324 - val_loss: 0.6448 - val_root_mean_squared_error: 0.8030
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3405,2.481993,1.575434,1.313217,2.481993
46,1.2479,0.770603,0.87784,0.750482,0.770603
69,0.8327,0.738751,0.859506,0.744502,0.738751


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 6.4517 - root_mean_squared_error: 2.5400
Epoch 1: val_root_mean_squared_error improved from inf to 0.55605, saving model to cache/ensemble_camembert-base/models/mlp/fc5bd52730716253769bd35ff76ee947e93964b90cf3ea4092759da568d035bb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3985 - root_mean_squared_error: 1.4875 - val_loss: 0.3092 - val_root_mean_squared_error: 0.5560
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4020 - root_mean_squared_error: 0.6340
Epoch 2: val_root_mean_squared_error did not improve from 0.55605
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5207 - root_mean_squared_error: 0.7206 - val_loss: 1.6201 - val_root_mean_squared_error: 1.2728
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9194,2.111877,1.45323,1.259493,2.111877
46,1.391,0.529192,0.727456,0.573411,0.529192
69,0.8625,0.573697,0.757428,0.629871,0.573697


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.0949 - root_mean_squared_error: 2.4688
Epoch 1: val_root_mean_squared_error improved from inf to 0.41745, saving model to cache/ensemble_camembert-base/models/mlp/39e47ce1314fad017ecccf6898186e710c6cea4ffc5dc820e052535d93d01a59_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8851 - root_mean_squared_error: 1.6330 - val_loss: 0.1743 - val_root_mean_squared_error: 0.4175
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4776 - root_mean_squared_error: 0.6911
Epoch 2: val_root_mean_squared_error did not improve from 0.41745
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5646 - root_mean_squared_error: 0.7484 - val_loss: 0.2057 - val_root_mean_squared_error: 0.4536
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6777,3.324423,1.8233,1.517377,3.324423
46,1.4066,1.063695,1.031356,0.851535,1.063695
69,0.8286,1.005753,1.002872,0.838234,1.005753


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 742ms/step - loss: 7.2883 - root_mean_squared_error: 2.6997
Epoch 1: val_root_mean_squared_error improved from inf to 0.49237, saving model to cache/ensemble_camembert-base/models/mlp/816e258b2199cfb978c52b9728c87d4692506200e2d81ad098e5abbe8be5bbb8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6800 - root_mean_squared_error: 1.5608 - val_loss: 0.2424 - val_root_mean_squared_error: 0.4924
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2478 - root_mean_squared_error: 0.4978
Epoch 2: val_root_mean_squared_error did not improve from 0.49237
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5763 - root_mean_squared_error: 0.7487 - val_loss: 0.7702 - val_root_mean_squared_error: 0.8776
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1427,2.455872,1.567122,1.288366,2.455872
46,1.17,0.906557,0.952133,0.749107,0.906557
69,0.9588,0.873756,0.934749,0.734606,0.873756


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 679ms/step - loss: 7.5584 - root_mean_squared_error: 2.7493
Epoch 1: val_root_mean_squared_error improved from inf to 0.45791, saving model to cache/ensemble_camembert-base/models/mlp/89971e217ad85a0448ee2ae73dfd959b000a05e022c78916b28e6aa14de21cbc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1529 - root_mean_squared_error: 1.6982 - val_loss: 0.2097 - val_root_mean_squared_error: 0.4579
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2911 - root_mean_squared_error: 0.5396
Epoch 2: val_root_mean_squared_error did not improve from 0.45791
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4113 - root_mean_squared_error: 0.6395 - val_loss: 0.5191 - val_root_mean_squared_error: 0.7205
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9552,3.05442,1.74769,1.42888,3.054421
46,1.477,1.062636,1.030842,0.881748,1.062636
69,0.7497,1.03357,1.016646,0.869087,1.03357


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 8.2803 - root_mean_squared_error: 2.8775
Epoch 1: val_root_mean_squared_error improved from inf to 0.80104, saving model to cache/ensemble_camembert-base/models/mlp/ce665f87fc6d71288aebd76ec739052e1f4ceb4f23d7e30bb781588a814c9ec4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8703 - root_mean_squared_error: 1.6238 - val_loss: 0.6417 - val_root_mean_squared_error: 0.8010
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.8982 - root_mean_squared_error: 0.9477
Epoch 2: val_root_mean_squared_error improved from 0.80104 to 0.44872, saving model to cache/ensemble_camembert-base/models/mlp/ce665f87fc6d71288aebd76ec739052e1f4ceb4f23d7e30bb781588a814c9ec4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5199 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7557,2.326468,1.525276,1.270141,2.326468
46,1.4401,0.771133,0.878142,0.715549,0.771133
69,0.7613,0.752527,0.867483,0.701994,0.752527


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 7.6624 - root_mean_squared_error: 2.7681
Epoch 1: val_root_mean_squared_error improved from inf to 0.59158, saving model to cache/ensemble_camembert-base/models/mlp/b0f284afa9d562d9c4539803b308383af04883a4832001487658a4a1b6fc92fb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7015 - root_mean_squared_error: 1.5729 - val_loss: 0.3500 - val_root_mean_squared_error: 0.5916
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5488 - root_mean_squared_error: 0.7408
Epoch 2: val_root_mean_squared_error improved from 0.59158 to 0.57770, saving model to cache/ensemble_camembert-base/models/mlp/b0f284afa9d562d9c4539803b308383af04883a4832001487658a4a1b6fc92fb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5001 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.027,2.546017,1.595624,1.326966,2.546017
46,1.4101,0.825262,0.908439,0.756513,0.825262
69,0.892,0.862877,0.928912,0.800148,0.862877


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 8.5504 - root_mean_squared_error: 2.9241
Epoch 1: val_root_mean_squared_error improved from inf to 0.67268, saving model to cache/ensemble_camembert-base/models/mlp/bf02187b9cbbee36fdc4f8716b1d4126e9818375300825574e11dfef1aeb3baf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8581 - root_mean_squared_error: 1.6190 - val_loss: 0.4525 - val_root_mean_squared_error: 0.6727
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2845 - root_mean_squared_error: 0.5334
Epoch 2: val_root_mean_squared_error improved from 0.67268 to 0.44172, saving model to cache/ensemble_camembert-base/models/mlp/bf02187b9cbbee36fdc4f8716b1d4126e9818375300825574e11dfef1aeb3baf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3405 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7634,3.383698,1.839483,1.51991,3.383698
46,1.2284,1.106846,1.052068,0.846753,1.106846
69,0.8019,1.08219,1.040284,0.82377,1.08219


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 6.9466 - root_mean_squared_error: 2.6356
Epoch 1: val_root_mean_squared_error improved from inf to 0.77428, saving model to cache/ensemble_camembert-base/models/mlp/b2265a49fa68d8f4f0a730cd87021f99e63b63d9544c41d4ca541f5ac8d00ecf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1549 - root_mean_squared_error: 1.7029 - val_loss: 0.5995 - val_root_mean_squared_error: 0.7743
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3912 - root_mean_squared_error: 0.6254
Epoch 2: val_root_mean_squared_error improved from 0.77428 to 0.45839, saving model to cache/ensemble_camembert-base/models/mlp/b2265a49fa68d8f4f0a730cd87021f99e63b63d9544c41d4ca541f5ac8d00ecf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4224 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8686,2.334597,1.527939,1.331167,2.334597
46,1.2638,0.554162,0.744421,0.628647,0.554162
69,0.8132,0.501474,0.708148,0.602373,0.501474


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 6.6041 - root_mean_squared_error: 2.5698
Epoch 1: val_root_mean_squared_error improved from inf to 0.63011, saving model to cache/ensemble_camembert-base/models/mlp/5b2151a00682234686528282d75a71a95b006354dc4523b68a7491603b839ad6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6896 - root_mean_squared_error: 1.5764 - val_loss: 0.3970 - val_root_mean_squared_error: 0.6301
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6556 - root_mean_squared_error: 0.8097
Epoch 2: val_root_mean_squared_error did not improve from 0.63011
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4910 - root_mean_squared_error: 0.6978 - val_loss: 0.7530 - val_root_mean_squared_error: 0.8677
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.912,2.90622,1.704764,1.366227,2.90622
46,1.4979,1.035571,1.01763,0.87658,1.035571
69,0.7042,1.013717,1.006835,0.860199,1.013717


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 6.1906 - root_mean_squared_error: 2.4881
Epoch 1: val_root_mean_squared_error improved from inf to 0.70166, saving model to cache/ensemble_camembert-base/models/mlp/ed05f8a35355ddf02d3e1cf37893c0d0d47285228164bcdf7a5a14ce0031091e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2300 - root_mean_squared_error: 1.7288 - val_loss: 0.4923 - val_root_mean_squared_error: 0.7017
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3591 - root_mean_squared_error: 0.5992
Epoch 2: val_root_mean_squared_error improved from 0.70166 to 0.53694, saving model to cache/ensemble_camembert-base/models/mlp/ed05f8a35355ddf02d3e1cf37893c0d0d47285228164bcdf7a5a14ce0031091e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5743 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2352,1.99754,1.413343,1.225699,1.99754
46,1.1463,0.545783,0.738771,0.626704,0.545783
69,0.8823,0.503756,0.709758,0.614593,0.503756


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 9.7176 - root_mean_squared_error: 3.1173
Epoch 1: val_root_mean_squared_error improved from inf to 0.64290, saving model to cache/ensemble_camembert-base/models/mlp/b8a3aaa608bd277a7adcc8c1d91110a8e79f821167f31d2bbb24e84d680ffc90_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9016 - root_mean_squared_error: 1.6184 - val_loss: 0.4133 - val_root_mean_squared_error: 0.6429
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3793 - root_mean_squared_error: 0.6159
Epoch 2: val_root_mean_squared_error improved from 0.64290 to 0.58838, saving model to cache/ensemble_camembert-base/models/mlp/b8a3aaa608bd277a7adcc8c1d91110a8e79f821167f31d2bbb24e84d680ffc90_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5057 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.569,2.834424,1.683575,1.426628,2.834424
46,1.3709,0.80618,0.897875,0.651944,0.80618
69,0.8307,0.79752,0.89304,0.666693,0.79752


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 6.8602 - root_mean_squared_error: 2.6192
Epoch 1: val_root_mean_squared_error improved from inf to 0.74233, saving model to cache/ensemble_camembert-base/models/mlp/9c9de70bdd3b496af76ca5b9681c9a0a072a3ec02da5132d7ee40d69d9326dae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5154 - root_mean_squared_error: 1.5217 - val_loss: 0.5511 - val_root_mean_squared_error: 0.7423
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0867 - root_mean_squared_error: 1.0425
Epoch 2: val_root_mean_squared_error improved from 0.74233 to 0.43918, saving model to cache/ensemble_camembert-base/models/mlp/9c9de70bdd3b496af76ca5b9681c9a0a072a3ec02da5132d7ee40d69d9326dae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6225 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5538,2.692078,1.640755,1.376309,2.692078
46,1.5885,0.79967,0.894242,0.718474,0.799669
69,0.7902,0.790118,0.888886,0.736007,0.790118


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 6.6721 - root_mean_squared_error: 2.5830
Epoch 1: val_root_mean_squared_error improved from inf to 0.82475, saving model to cache/ensemble_camembert-base/models/mlp/3d1ef24b6d4f35c08b717910579ac88707b4603031d63524fb84a13630c617b3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9291 - root_mean_squared_error: 1.6501 - val_loss: 0.6802 - val_root_mean_squared_error: 0.8248
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9475 - root_mean_squared_error: 0.9734
Epoch 2: val_root_mean_squared_error did not improve from 0.82475
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6784 - root_mean_squared_error: 0.8158 - val_loss: 0.8858 - val_root_mean_squared_error: 0.9412
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3533,2.732747,1.653102,1.412847,2.732748
46,1.4161,0.738883,0.859583,0.703185,0.738883
69,0.7599,0.743442,0.862231,0.702351,0.743442


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 6.4557 - root_mean_squared_error: 2.5408
Epoch 1: val_root_mean_squared_error improved from inf to 0.55856, saving model to cache/ensemble_camembert-base/models/mlp/309293f22669c166a20005ee416077a82f9097ab35409833f2955eee597b4148_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8955 - root_mean_squared_error: 1.6368 - val_loss: 0.3120 - val_root_mean_squared_error: 0.5586
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1193 - root_mean_squared_error: 0.3454
Epoch 2: val_root_mean_squared_error improved from 0.55856 to 0.39151, saving model to cache/ensemble_camembert-base/models/mlp/309293f22669c166a20005ee416077a82f9097ab35409833f2955eee597b4148_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4010 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3339,2.209976,1.486599,1.221649,2.209976
46,1.2557,0.799246,0.894006,0.725206,0.799246
69,0.866,0.766635,0.875577,0.717651,0.766635


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 7.6106 - root_mean_squared_error: 2.7587
Epoch 1: val_root_mean_squared_error improved from inf to 0.84116, saving model to cache/ensemble_camembert-base/models/mlp/b4f8f925de208ff636b31f14be6666236d9eeb902b6e1206fb9be60d390714fc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8188 - root_mean_squared_error: 1.6012 - val_loss: 0.7075 - val_root_mean_squared_error: 0.8412
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 1.1852 - root_mean_squared_error: 1.0886
Epoch 2: val_root_mean_squared_error did not improve from 0.84116
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8129 - root_mean_squared_error: 0.8856 - val_loss: 0.7613 - val_root_mean_squared_error: 0.8725
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9957,2.679048,1.63678,1.310665,2.679048
46,1.3456,1.013747,1.00685,0.893455,1.013747
69,0.8478,1.003165,1.001581,0.887595,1.003165


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.6307 - root_mean_squared_error: 2.7624
Epoch 1: val_root_mean_squared_error improved from inf to 0.64116, saving model to cache/ensemble_camembert-base/models/mlp/c9d75fe5be2648efa233cbeb716ac7fa798248719ffb14e756f73c971e4a9159_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5930 - root_mean_squared_error: 1.5375 - val_loss: 0.4111 - val_root_mean_squared_error: 0.6412
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3546 - root_mean_squared_error: 0.5955
Epoch 2: val_root_mean_squared_error improved from 0.64116 to 0.44433, saving model to cache/ensemble_camembert-base/models/mlp/c9d75fe5be2648efa233cbeb716ac7fa798248719ffb14e756f73c971e4a9159_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4950 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4354,1.909237,1.381751,1.119262,1.909237
46,1.3341,0.783877,0.885368,0.741604,0.783877
69,0.7508,0.809171,0.899539,0.751823,0.809171


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 8.0047 - root_mean_squared_error: 2.8293
Epoch 1: val_root_mean_squared_error improved from inf to 0.91330, saving model to cache/ensemble_camembert-base/models/mlp/c8bfae2ccd69338972a7a14eeda05d3a0c63cdb958288d303d4689cac2c30054_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2127 - root_mean_squared_error: 1.7148 - val_loss: 0.8341 - val_root_mean_squared_error: 0.9133
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5105 - root_mean_squared_error: 0.7145
Epoch 2: val_root_mean_squared_error improved from 0.91330 to 0.41900, saving model to cache/ensemble_camembert-base/models/mlp/c8bfae2ccd69338972a7a14eeda05d3a0c63cdb958288d303d4689cac2c30054_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6431 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7861,2.689888,1.640088,1.390248,2.689887
46,1.0871,0.839144,0.916048,0.723135,0.839144
69,0.879,0.724012,0.850889,0.637458,0.724013


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 3.4663 - root_mean_squared_error: 1.8618
Epoch 1: val_root_mean_squared_error improved from inf to 1.04933, saving model to cache/ensemble_camembert-base/models/mlp/fce04785d20e070d6450df4b6b6bc0ceb59ec5b2204d7f233804b25d3e31ddd1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3598 - root_mean_squared_error: 1.4846 - val_loss: 1.1011 - val_root_mean_squared_error: 1.0493
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5742 - root_mean_squared_error: 0.7578
Epoch 2: val_root_mean_squared_error improved from 1.04933 to 0.55346, saving model to cache/ensemble_camembert-base/models/mlp/fce04785d20e070d6450df4b6b6bc0ceb59ec5b2204d7f233804b25d3e31ddd1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4736 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0239,2.658519,1.630496,1.347994,2.658519
46,1.221,0.882362,0.939341,0.736048,0.882362
69,0.9583,0.877862,0.936943,0.758001,0.877862


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 5.3357 - root_mean_squared_error: 2.3099
Epoch 1: val_root_mean_squared_error improved from inf to 0.49254, saving model to cache/ensemble_camembert-base/models/mlp/b593633b359c2358a568a0b4a680e9318d9bd21e6ea3cf686a8dc1d6a764ee56_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9061 - root_mean_squared_error: 1.6450 - val_loss: 0.2426 - val_root_mean_squared_error: 0.4925
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2376 - root_mean_squared_error: 0.4875
Epoch 2: val_root_mean_squared_error improved from 0.49254 to 0.31486, saving model to cache/ensemble_camembert-base/models/mlp/b593633b359c2358a568a0b4a680e9318d9bd21e6ea3cf686a8dc1d6a764ee56_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4758 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.685,4.127666,2.031666,1.847218,4.127665
46,1.2968,0.917753,0.957994,0.747174,0.917753
69,0.9816,0.84481,0.919136,0.736538,0.84481


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 7.9817 - root_mean_squared_error: 2.8252
Epoch 1: val_root_mean_squared_error improved from inf to 0.54603, saving model to cache/ensemble_camembert-base/models/mlp/dd5416e2647d916f62b2dab98fa0b72cfc8afef514602a983e580d18b899c8fe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8821 - root_mean_squared_error: 1.6178 - val_loss: 0.2981 - val_root_mean_squared_error: 0.5460
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 0.9046 - root_mean_squared_error: 0.9511
Epoch 2: val_root_mean_squared_error did not improve from 0.54603
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5288 - root_mean_squared_error: 0.7155 - val_loss: 2.1722 - val_root_mean_squared_error: 1.4739
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7559,3.968971,1.992228,1.715644,3.968971
46,1.5388,1.205793,1.098086,0.82228,1.205793
69,0.8504,1.045836,1.022661,0.79498,1.045836


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 8.8265 - root_mean_squared_error: 2.9709
Epoch 1: val_root_mean_squared_error improved from inf to 0.91637, saving model to cache/ensemble_camembert-base/models/mlp/f11d0165c931292c15a0e4fae4dcbd0290b167f5587ec7e68c07053b7a23e905_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9148 - root_mean_squared_error: 1.6232 - val_loss: 0.8397 - val_root_mean_squared_error: 0.9164
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6051 - root_mean_squared_error: 0.7779
Epoch 2: val_root_mean_squared_error improved from 0.91637 to 0.78871, saving model to cache/ensemble_camembert-base/models/mlp/f11d0165c931292c15a0e4fae4dcbd0290b167f5587ec7e68c07053b7a23e905_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5822 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1783,1.866276,1.366117,1.127915,1.866276
46,1.5162,0.699528,0.836378,0.676697,0.699528
69,0.9117,0.719466,0.848213,0.696828,0.719466


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 5.2886 - root_mean_squared_error: 2.2997
Epoch 1: val_root_mean_squared_error improved from inf to 0.86762, saving model to cache/ensemble_camembert-base/models/mlp/a357ce1f360b11b29548b7de647364a6e7467085a8831c85a29793f7b3c37f0a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5656 - root_mean_squared_error: 1.5406 - val_loss: 0.7528 - val_root_mean_squared_error: 0.8676
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3954 - root_mean_squared_error: 0.6288
Epoch 2: val_root_mean_squared_error improved from 0.86762 to 0.76227, saving model to cache/ensemble_camembert-base/models/mlp/a357ce1f360b11b29548b7de647364a6e7467085a8831c85a29793f7b3c37f0a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5432 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.899,3.024019,1.73897,1.448528,3.024018
46,1.4833,0.958499,0.97903,0.750023,0.958499
69,0.8392,0.921982,0.960199,0.766978,0.921982


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 5.9073 - root_mean_squared_error: 2.4305
Epoch 1: val_root_mean_squared_error improved from inf to 0.85610, saving model to cache/ensemble_camembert-base/models/mlp/71fdcc3b8122b6ee1919855932e05d927b9bc8c6ff8b5c392de6af933844a3ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6318 - root_mean_squared_error: 1.5601 - val_loss: 0.7329 - val_root_mean_squared_error: 0.8561
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6428 - root_mean_squared_error: 0.8018
Epoch 2: val_root_mean_squared_error did not improve from 0.85610
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.9520 - root_mean_squared_error: 0.9590 - val_loss: 0.9543 - val_root_mean_squared_error: 0.9769
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8499,1.805091,1.343537,1.085362,1.805091
46,1.8507,0.7161,0.846227,0.730237,0.7161
69,0.8237,0.674281,0.821146,0.69797,0.674281


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 5.2965 - root_mean_squared_error: 2.3014
Epoch 1: val_root_mean_squared_error improved from inf to 0.52135, saving model to cache/ensemble_camembert-base/models/mlp/a1fb1df38bb1083567f4a912152ec6e66ae90613a9eb44b35adf1b59163a5294_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3221 - root_mean_squared_error: 1.7537 - val_loss: 0.2718 - val_root_mean_squared_error: 0.5213
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1762 - root_mean_squared_error: 0.4197
Epoch 2: val_root_mean_squared_error did not improve from 0.52135
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5044 - root_mean_squared_error: 0.7020 - val_loss: 0.4507 - val_root_mean_squared_error: 0.6714
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1524,2.868426,1.693643,1.41808,2.868426
46,1.3896,0.803258,0.896247,0.727246,0.803258
69,0.7775,0.937174,0.968078,0.790635,0.937174


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 4.7121 - root_mean_squared_error: 2.1707
Epoch 1: val_root_mean_squared_error improved from inf to 0.86743, saving model to cache/ensemble_camembert-base/models/mlp/6230434ae7f0f4673d8de43f0b8753b2340635545796f6c393e4c59401abacc4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5892 - root_mean_squared_error: 1.5587 - val_loss: 0.7524 - val_root_mean_squared_error: 0.8674
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1167 - root_mean_squared_error: 0.3416
Epoch 2: val_root_mean_squared_error improved from 0.86743 to 0.79087, saving model to cache/ensemble_camembert-base/models/mlp/6230434ae7f0f4673d8de43f0b8753b2340635545796f6c393e4c59401abacc4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6891 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7443,2.652233,1.628568,1.306608,2.652233
46,1.3417,0.944935,0.972078,0.821634,0.944935
69,0.7284,0.82948,0.910758,0.753863,0.82948


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 6.9736 - root_mean_squared_error: 2.6408
Epoch 1: val_root_mean_squared_error improved from inf to 0.66625, saving model to cache/ensemble_camembert-base/models/mlp/f6a0475238b2f72e8e97fa597d3d5e5fe89dac0adf4d5c4e7f07494fa5007fbe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2502 - root_mean_squared_error: 1.7280 - val_loss: 0.4439 - val_root_mean_squared_error: 0.6663
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3072 - root_mean_squared_error: 0.5543
Epoch 2: val_root_mean_squared_error improved from 0.66625 to 0.45916, saving model to cache/ensemble_camembert-base/models/mlp/f6a0475238b2f72e8e97fa597d3d5e5fe89dac0adf4d5c4e7f07494fa5007fbe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4872 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1235,3.39229,1.841817,1.540842,3.39229
46,1.5317,1.035338,1.017515,0.816996,1.035338
69,0.8892,0.993737,0.996864,0.820714,0.993737


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 5.7321 - root_mean_squared_error: 2.3942
Epoch 1: val_root_mean_squared_error improved from inf to 0.47943, saving model to cache/ensemble_camembert-base/models/mlp/cee169265ea48030d95963d537ff514ef77b07792ffa7d65e6b0905f3634f920_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3131 - root_mean_squared_error: 1.7595 - val_loss: 0.2299 - val_root_mean_squared_error: 0.4794
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2039 - root_mean_squared_error: 0.4516
Epoch 2: val_root_mean_squared_error improved from 0.47943 to 0.40121, saving model to cache/ensemble_camembert-base/models/mlp/cee169265ea48030d95963d537ff514ef77b07792ffa7d65e6b0905f3634f920_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4196 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2088,3.429423,1.85187,1.604255,3.429423
46,1.3764,0.908535,0.953171,0.760045,0.908534
69,0.72,0.652026,0.807481,0.632002,0.652026


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 8.5370 - root_mean_squared_error: 2.9218
Epoch 1: val_root_mean_squared_error improved from inf to 0.72128, saving model to cache/ensemble_camembert-base/models/mlp/cafa527369311aa9487252136af8938f9ed2234720056d9d913c415486694bb6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4998 - root_mean_squared_error: 1.7861 - val_loss: 0.5202 - val_root_mean_squared_error: 0.7213
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5587 - root_mean_squared_error: 0.7475
Epoch 2: val_root_mean_squared_error improved from 0.72128 to 0.35420, saving model to cache/ensemble_camembert-base/models/mlp/cafa527369311aa9487252136af8938f9ed2234720056d9d913c415486694bb6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4567 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2763,2.685216,1.638663,1.294802,2.685216
46,1.2962,1.013596,1.006775,0.82618,1.013596
69,0.7439,0.979509,0.989701,0.82541,0.979509


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 8.5711 - root_mean_squared_error: 2.9276
Epoch 1: val_root_mean_squared_error improved from inf to 0.85557, saving model to cache/ensemble_camembert-base/models/mlp/5eed6be817ccc55cb4f9507e59170cfe8070fcb1aee8bc32fa5e487cad137e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9185 - root_mean_squared_error: 1.6323 - val_loss: 0.7320 - val_root_mean_squared_error: 0.8556
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.1247 - root_mean_squared_error: 1.0605
Epoch 2: val_root_mean_squared_error improved from 0.85557 to 0.50640, saving model to cache/ensemble_camembert-base/models/mlp/5eed6be817ccc55cb4f9507e59170cfe8070fcb1aee8bc32fa5e487cad137e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5484 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0146,2.680495,1.637222,1.335969,2.680495
46,1.357,0.948229,0.97377,0.786806,0.948229
69,0.762,0.934846,0.966874,0.780931,0.934846


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 5.0738 - root_mean_squared_error: 2.2525
Epoch 1: val_root_mean_squared_error improved from inf to 0.85516, saving model to cache/ensemble_camembert-base/models/mlp/0f9da934e850fc9ddd6a84688029dc634de43ddcea5499b59d4fabc29432cdd2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5972 - root_mean_squared_error: 1.5523 - val_loss: 0.7313 - val_root_mean_squared_error: 0.8552
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4113 - root_mean_squared_error: 0.6413
Epoch 2: val_root_mean_squared_error did not improve from 0.85516
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4759 - root_mean_squared_error: 0.6885 - val_loss: 0.8574 - val_root_mean_squared_error: 0.9260
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.3523,2.097614,1.448314,1.228657,2.097614
46,1.1948,0.625414,0.790831,0.624826,0.625414
69,0.7833,0.575587,0.758674,0.566638,0.575587


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 7.5850 - root_mean_squared_error: 2.7541
Epoch 1: val_root_mean_squared_error improved from inf to 0.59421, saving model to cache/ensemble_camembert-base/models/mlp/cd4578834db3f93adbc44069c684be0580c004fffb9119e905cd91fcc1bed98b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - loss: 2.8021 - root_mean_squared_error: 1.6006 - val_loss: 0.3531 - val_root_mean_squared_error: 0.5942
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4036 - root_mean_squared_error: 0.6353
Epoch 2: val_root_mean_squared_error improved from 0.59421 to 0.44640, saving model to cache/ensemble_camembert-base/models/mlp/cd4578834db3f93adbc44069c684be0580c004fffb9119e905cd91fcc1bed98b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3389 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7696,2.064426,1.436811,1.205585,2.064426
46,1.3353,0.642409,0.801504,0.63816,0.642409
69,0.9523,0.69037,0.830885,0.688041,0.69037


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 4.5568 - root_mean_squared_error: 2.1347
Epoch 1: val_root_mean_squared_error improved from inf to 0.69145, saving model to cache/ensemble_camembert-base/models/mlp/a5a9a4854aeb7f72a8a9b8e32892a4e54820d2fabb11be13054c692258051210_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5493 - root_mean_squared_error: 1.5440 - val_loss: 0.4781 - val_root_mean_squared_error: 0.6915
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2921 - root_mean_squared_error: 0.5404
Epoch 2: val_root_mean_squared_error improved from 0.69145 to 0.40615, saving model to cache/ensemble_camembert-base/models/mlp/a5a9a4854aeb7f72a8a9b8e32892a4e54820d2fabb11be13054c692258051210_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5218 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5863,2.020355,1.421392,1.188557,2.020354
46,1.2684,0.708296,0.841603,0.705135,0.708296
69,0.851,0.66388,0.814788,0.68476,0.66388


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 8.9947 - root_mean_squared_error: 2.9991
Epoch 1: val_root_mean_squared_error improved from inf to 0.51741, saving model to cache/ensemble_camembert-base/models/mlp/5f8b16c5a7c32370e2acd0f8929834bb9456dbb195230a5666aeee530457cbb0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0996 - root_mean_squared_error: 1.6831 - val_loss: 0.2677 - val_root_mean_squared_error: 0.5174
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4321 - root_mean_squared_error: 0.6573
Epoch 2: val_root_mean_squared_error improved from 0.51741 to 0.36362, saving model to cache/ensemble_camembert-base/models/mlp/5f8b16c5a7c32370e2acd0f8929834bb9456dbb195230a5666aeee530457cbb0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5698 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4837,2.325274,1.524885,1.342656,2.325273
46,1.0946,0.548331,0.740494,0.60091,0.548331
69,0.8735,0.560655,0.748769,0.615065,0.560655


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 5.0050 - root_mean_squared_error: 2.2372
Epoch 1: val_root_mean_squared_error improved from inf to 1.20995, saving model to cache/ensemble_camembert-base/models/mlp/25573f5fdbafd33dc5d880922f136f3c15a97967f251c075c4331054c6393df1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.1581 - root_mean_squared_error: 1.4215 - val_loss: 1.4640 - val_root_mean_squared_error: 1.2099
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.9267 - root_mean_squared_error: 1.3881
Epoch 2: val_root_mean_squared_error improved from 1.20995 to 1.17590, saving model to cache/ensemble_camembert-base/models/mlp/25573f5fdbafd33dc5d880922f136f3c15a97967f251c075c4331054c6393df1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.9132 - root_mean_squared_error: 0.9

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5969,3.285701,1.81265,1.527739,3.285701
46,1.286,0.989924,0.994949,0.789368,0.989924
69,0.6538,0.988014,0.993989,0.783934,0.988014


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 9.0384 - root_mean_squared_error: 3.0064
Epoch 1: val_root_mean_squared_error improved from inf to 0.84207, saving model to cache/ensemble_camembert-base/models/mlp/fe709aa942371b132a95782966e8e7efe2e93dca80ac9ef37d14fcdca605aabe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8537 - root_mean_squared_error: 1.6063 - val_loss: 0.7091 - val_root_mean_squared_error: 0.8421
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.6091 - root_mean_squared_error: 0.7804
Epoch 2: val_root_mean_squared_error did not improve from 0.84207
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7528 - root_mean_squared_error: 0.8563 - val_loss: 0.9491 - val_root_mean_squared_error: 0.9742
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8952,2.053731,1.433084,1.159183,2.053731
46,1.5168,0.803161,0.896192,0.745323,0.803161
69,0.9645,0.91347,0.955756,0.785895,0.91347


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 5.6280 - root_mean_squared_error: 2.3723
Epoch 1: val_root_mean_squared_error improved from inf to 0.83870, saving model to cache/ensemble_camembert-base/models/mlp/d083a0ce7f81b925320fe3299134545653ffe67a36cd6ef2c818468c6d8c9988_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5556 - root_mean_squared_error: 1.5385 - val_loss: 0.7034 - val_root_mean_squared_error: 0.8387
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3891 - root_mean_squared_error: 0.6238
Epoch 2: val_root_mean_squared_error improved from 0.83870 to 0.59544, saving model to cache/ensemble_camembert-base/models/mlp/d083a0ce7f81b925320fe3299134545653ffe67a36cd6ef2c818468c6d8c9988_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6563 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8287,2.218397,1.489428,1.208345,2.218397
46,1.3732,0.846362,0.919979,0.745714,0.846362
69,0.9267,0.904348,0.950972,0.769597,0.904348


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.6375 - root_mean_squared_error: 2.7636
Epoch 1: val_root_mean_squared_error improved from inf to 0.63101, saving model to cache/ensemble_camembert-base/models/mlp/d166cf1fb6379e52591e8b9c4254d40ce58aeebdda74e3bd8492ad84a2dca142_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9815 - root_mean_squared_error: 1.6527 - val_loss: 0.3982 - val_root_mean_squared_error: 0.6310
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3205 - root_mean_squared_error: 0.5662
Epoch 2: val_root_mean_squared_error improved from 0.63101 to 0.36208, saving model to cache/ensemble_camembert-base/models/mlp/d166cf1fb6379e52591e8b9c4254d40ce58aeebdda74e3bd8492ad84a2dca142_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4609 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2946,2.28688,1.512243,1.326792,2.28688
46,1.306,0.552115,0.743044,0.557147,0.552115
69,0.7993,0.537675,0.733263,0.552329,0.537675


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 690ms/step - loss: 6.4263 - root_mean_squared_error: 2.5350
Epoch 1: val_root_mean_squared_error improved from inf to 0.41236, saving model to cache/ensemble_camembert-base/models/mlp/43eed69246bd9282f4a4e2b07e3e0e39980b6ea0c81608dd9085d6cd9597d69b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8090 - root_mean_squared_error: 1.6124 - val_loss: 0.1700 - val_root_mean_squared_error: 0.4124
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2092 - root_mean_squared_error: 0.4574
Epoch 2: val_root_mean_squared_error did not improve from 0.41236
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3584 - root_mean_squared_error: 0.5953 - val_loss: 0.2034 - val_root_mean_squared_error: 0.4510
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0444,3.330018,1.824834,1.551828,3.330018
46,1.214,0.980377,0.99014,0.785599,0.980377
69,0.9357,0.933258,0.966053,0.781084,0.933258


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 4.7705 - root_mean_squared_error: 2.1841
Epoch 1: val_root_mean_squared_error improved from inf to 0.73691, saving model to cache/ensemble_camembert-base/models/mlp/bf7d445763353a455fb2ed8c1aa9d4be47cbe3341b3f4a30ceaa9e5f186c265e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6394 - root_mean_squared_error: 1.5773 - val_loss: 0.5430 - val_root_mean_squared_error: 0.7369
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2950 - root_mean_squared_error: 0.5431
Epoch 2: val_root_mean_squared_error improved from 0.73691 to 0.57489, saving model to cache/ensemble_camembert-base/models/mlp/bf7d445763353a455fb2ed8c1aa9d4be47cbe3341b3f4a30ceaa9e5f186c265e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5699 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8237,2.11787,1.455291,1.19188,2.11787
46,1.1991,0.774332,0.879962,0.72703,0.774332
69,0.7365,0.799464,0.894127,0.752782,0.799464


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 642ms/step - loss: 9.0166 - root_mean_squared_error: 3.0028
Epoch 1: val_root_mean_squared_error improved from inf to 0.92862, saving model to cache/ensemble_camembert-base/models/mlp/183458d6ef231572363d79c807b3a5168d5b76c59da1f13459a03e2ee3816770_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.5889 - root_mean_squared_error: 1.8110 - val_loss: 0.8623 - val_root_mean_squared_error: 0.9286
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.6072 - root_mean_squared_error: 0.7792
Epoch 2: val_root_mean_squared_error improved from 0.92862 to 0.55836, saving model to cache/ensemble_camembert-base/models/mlp/183458d6ef231572363d79c807b3a5168d5b76c59da1f13459a03e2ee3816770_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7377 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6792,2.788431,1.66986,1.37959,2.788431
46,1.1564,0.892171,0.944548,0.833479,0.892171
69,0.943,0.869362,0.932396,0.821628,0.869362


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 5.6559 - root_mean_squared_error: 2.3782
Epoch 1: val_root_mean_squared_error improved from inf to 0.56878, saving model to cache/ensemble_camembert-base/models/mlp/4976c05eeb22a9cdf14f2c4e4ae4c6ca8d92c6ebc48065fbd6200dadbc275e8f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4031 - root_mean_squared_error: 1.4907 - val_loss: 0.3235 - val_root_mean_squared_error: 0.5688
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4694 - root_mean_squared_error: 0.6851
Epoch 2: val_root_mean_squared_error improved from 0.56878 to 0.44870, saving model to cache/ensemble_camembert-base/models/mlp/4976c05eeb22a9cdf14f2c4e4ae4c6ca8d92c6ebc48065fbd6200dadbc275e8f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5706 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6969,2.01919,1.420982,1.177111,2.019189
46,1.3313,0.650885,0.806774,0.673243,0.650885
69,0.8903,0.657292,0.810736,0.675525,0.657292


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.0236 - root_mean_squared_error: 2.6502
Epoch 1: val_root_mean_squared_error improved from inf to 0.45067, saving model to cache/ensemble_camembert-base/models/mlp/d8dc3a3bcd119757161db5929b64dac089221b5ac587519629bfc3de8ed7c4cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 3.0684 - root_mean_squared_error: 1.6808 - val_loss: 0.2031 - val_root_mean_squared_error: 0.4507
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5832 - root_mean_squared_error: 0.7637
Epoch 2: val_root_mean_squared_error improved from 0.45067 to 0.33525, saving model to cache/ensemble_camembert-base/models/mlp/d8dc3a3bcd119757161db5929b64dac089221b5ac587519629bfc3de8ed7c4cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5111 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2297,1.708945,1.307266,1.039711,1.708945
46,1.6341,0.816724,0.903728,0.772654,0.816724
69,0.6771,0.91703,0.957617,0.812272,0.91703


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 10.8382 - root_mean_squared_error: 3.2921
Epoch 1: val_root_mean_squared_error improved from inf to 0.40284, saving model to cache/ensemble_camembert-base/models/mlp/eb56009fedb982f754ad85aed01dadec57982231482e1ef07839c4bdf9268de7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0661 - root_mean_squared_error: 1.6597 - val_loss: 0.1623 - val_root_mean_squared_error: 0.4028
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4757 - root_mean_squared_error: 0.6897
Epoch 2: val_root_mean_squared_error improved from 0.40284 to 0.36342, saving model to cache/ensemble_camembert-base/models/mlp/eb56009fedb982f754ad85aed01dadec57982231482e1ef07839c4bdf9268de7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5973 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.055,2.039347,1.428057,1.150791,2.039347
46,1.0577,0.813392,0.901882,0.744904,0.813392
69,0.8692,0.779986,0.883168,0.728994,0.779986


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 8.0758 - root_mean_squared_error: 2.8418
Epoch 1: val_root_mean_squared_error improved from inf to 0.67638, saving model to cache/ensemble_camembert-base/models/mlp/1f0680205112bab0fa887c0a2a33810a05bec5e449ce8aaa3aecb1affeb4c30c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4024 - root_mean_squared_error: 1.7648 - val_loss: 0.4575 - val_root_mean_squared_error: 0.6764
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7398 - root_mean_squared_error: 0.8601
Epoch 2: val_root_mean_squared_error improved from 0.67638 to 0.35394, saving model to cache/ensemble_camembert-base/models/mlp/1f0680205112bab0fa887c0a2a33810a05bec5e449ce8aaa3aecb1affeb4c30c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4819 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8925,2.214107,1.487988,1.257815,2.214107
46,1.4384,0.67489,0.821517,0.672953,0.67489
69,0.9191,0.688762,0.829917,0.67981,0.688762


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 7.3907 - root_mean_squared_error: 2.7186
Epoch 1: val_root_mean_squared_error improved from inf to 0.43679, saving model to cache/ensemble_camembert-base/models/mlp/23d74afe30d4b952316e803e78a46796dc8cb76e6f7a3a8e71ebbc1cbafaef9e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5690 - root_mean_squared_error: 1.5351 - val_loss: 0.1908 - val_root_mean_squared_error: 0.4368
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4183 - root_mean_squared_error: 0.6467
Epoch 2: val_root_mean_squared_error did not improve from 0.43679
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4661 - root_mean_squared_error: 0.6811 - val_loss: 0.3592 - val_root_mean_squared_error: 0.5994
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4831,2.703057,1.644098,1.36527,2.703057
46,1.6215,0.838668,0.915788,0.704736,0.838668
69,0.8447,0.813243,0.9018,0.736587,0.813243


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 8.0169 - root_mean_squared_error: 2.8314
Epoch 1: val_root_mean_squared_error improved from inf to 0.55435, saving model to cache/ensemble_camembert-base/models/mlp/7a78e08dad16cc2b21c1c2cdba1fd614606ac0666f16abe470aa6f34dff8397b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 3.1260 - root_mean_squared_error: 1.6908 - val_loss: 0.3073 - val_root_mean_squared_error: 0.5544
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3118 - root_mean_squared_error: 0.5584
Epoch 2: val_root_mean_squared_error improved from 0.55435 to 0.46892, saving model to cache/ensemble_camembert-base/models/mlp/7a78e08dad16cc2b21c1c2cdba1fd614606ac0666f16abe470aa6f34dff8397b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4801 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2213,2.822486,1.680026,1.443826,2.822486
46,1.2024,0.745018,0.863144,0.694848,0.745018
69,0.7571,0.699322,0.836255,0.672594,0.699322


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 6.3233 - root_mean_squared_error: 2.5146
Epoch 1: val_root_mean_squared_error improved from inf to 0.61379, saving model to cache/ensemble_camembert-base/models/mlp/483b33af5f90d2d7949f7aa3288319156264cd8b86a042df746190742285050b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1165 - root_mean_squared_error: 1.6975 - val_loss: 0.3767 - val_root_mean_squared_error: 0.6138
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2477 - root_mean_squared_error: 0.4977
Epoch 2: val_root_mean_squared_error improved from 0.61379 to 0.44774, saving model to cache/ensemble_camembert-base/models/mlp/483b33af5f90d2d7949f7aa3288319156264cd8b86a042df746190742285050b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3553 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9177,2.493871,1.579199,1.319048,2.493871
46,1.2646,0.782398,0.884533,0.765455,0.782398
69,0.8598,0.733429,0.856405,0.737805,0.73343


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m51s[0m 3s/step - loss: 6.7628 - root_mean_squared_error: 2.6005
Epoch 1: val_root_mean_squared_error improved from inf to 0.68238, saving model to cache/ensemble_camembert-base/models/mlp/a98874d3af321769dacd345fef988fba1d8e5abf878391234b1670475bec4d29_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 111ms/step - loss: 2.6885 - root_mean_squared_error: 1.5766 - val_loss: 0.4656 - val_root_mean_squared_error: 0.6824
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2284 - root_mean_squared_error: 0.4780
Epoch 2: val_root_mean_squared_error improved from 0.68238 to 0.61518, saving model to cache/ensemble_camembert-base/models/mlp/a98874d3af321769dacd345fef988fba1d8e5abf878391234b1670475bec4d29_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5036 - root_mean_squared_error: 0.701

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.2253,3.29308,1.814684,1.578819,3.293079
46,1.0724,0.924352,0.961432,0.73284,0.924352
69,0.797,0.888805,0.942765,0.718964,0.888805


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 6.8022 - root_mean_squared_error: 2.6081
Epoch 1: val_root_mean_squared_error improved from inf to 0.66565, saving model to cache/ensemble_camembert-base/models/mlp/4e0c0e453d7907eb14cdeff662e022ed234097c96d59df1b7f89bf57b8b09c82_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9045 - root_mean_squared_error: 1.6382 - val_loss: 0.4431 - val_root_mean_squared_error: 0.6656
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2716 - root_mean_squared_error: 0.5212
Epoch 2: val_root_mean_squared_error improved from 0.66565 to 0.55787, saving model to cache/ensemble_camembert-base/models/mlp/4e0c0e453d7907eb14cdeff662e022ed234097c96d59df1b7f89bf57b8b09c82_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4787 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9033,2.085995,1.444297,1.210268,2.085995
46,1.5768,0.724399,0.851116,0.695666,0.724399
69,0.7808,0.700954,0.83723,0.671678,0.700954


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 8.7731 - root_mean_squared_error: 2.9619
Epoch 1: val_root_mean_squared_error improved from inf to 0.59385, saving model to cache/ensemble_camembert-base/models/mlp/b2f5822a98d84c3dcc011d46f3a437013cd7580faa1d4afe658dfa9579130925_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3199 - root_mean_squared_error: 1.7400 - val_loss: 0.3527 - val_root_mean_squared_error: 0.5938
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4377 - root_mean_squared_error: 0.6616
Epoch 2: val_root_mean_squared_error improved from 0.59385 to 0.55575, saving model to cache/ensemble_camembert-base/models/mlp/b2f5822a98d84c3dcc011d46f3a437013cd7580faa1d4afe658dfa9579130925_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4743 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9821,2.270997,1.506983,1.296639,2.270997
46,1.5571,0.591264,0.768937,0.617523,0.591264
69,0.962,0.615146,0.784313,0.64057,0.615146


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 6.6712 - root_mean_squared_error: 2.5829
Epoch 1: val_root_mean_squared_error improved from inf to 0.48295, saving model to cache/ensemble_camembert-base/models/mlp/3faa95308897e7acf1cd78779ed6236072a67a0a737c4f0dda5f4c0d29bb6a37_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7858 - root_mean_squared_error: 1.5999 - val_loss: 0.2332 - val_root_mean_squared_error: 0.4829
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3220 - root_mean_squared_error: 0.5675
Epoch 2: val_root_mean_squared_error did not improve from 0.48295
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5741 - root_mean_squared_error: 0.7522 - val_loss: 0.4023 - val_root_mean_squared_error: 0.6343
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.097,3.013164,1.735847,1.456892,3.013163
46,1.1751,0.909434,0.953643,0.744612,0.909434
69,0.7129,0.893025,0.945,0.738607,0.893025


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 4.9810 - root_mean_squared_error: 2.2318
Epoch 1: val_root_mean_squared_error improved from inf to 0.61388, saving model to cache/ensemble_camembert-base/models/mlp/824f87d6c7c60f9781842265a1024888cb76f930850c07e83abbaf20e904038a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5587 - root_mean_squared_error: 1.5409 - val_loss: 0.3769 - val_root_mean_squared_error: 0.6139
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1740 - root_mean_squared_error: 0.4171
Epoch 2: val_root_mean_squared_error improved from 0.61388 to 0.50095, saving model to cache/ensemble_camembert-base/models/mlp/824f87d6c7c60f9781842265a1024888cb76f930850c07e83abbaf20e904038a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4347 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.5939,2.434378,1.560249,1.223751,2.434378
46,1.181,0.990007,0.994991,0.853051,0.990007
69,0.8374,0.96334,0.981499,0.840131,0.96334


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 5.0304 - root_mean_squared_error: 2.2428
Epoch 1: val_root_mean_squared_error improved from inf to 0.69007, saving model to cache/ensemble_camembert-base/models/mlp/144a1ad3bb37d3f062d095a029bb526e6b0dda7af7fadfd6336945264a60d876_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7415 - root_mean_squared_error: 1.6036 - val_loss: 0.4762 - val_root_mean_squared_error: 0.6901
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5231 - root_mean_squared_error: 0.7233
Epoch 2: val_root_mean_squared_error improved from 0.69007 to 0.42940, saving model to cache/ensemble_camembert-base/models/mlp/144a1ad3bb37d3f062d095a029bb526e6b0dda7af7fadfd6336945264a60d876_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7427 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7542,2.316176,1.521899,1.286873,2.316176
46,1.449,0.683735,0.826882,0.688775,0.683735
69,0.7567,0.610214,0.781162,0.648972,0.610214


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 6.2337 - root_mean_squared_error: 2.4967
Epoch 1: val_root_mean_squared_error improved from inf to 0.50597, saving model to cache/ensemble_camembert-base/models/mlp/7f65c1db0bf5816778ccfac18dbf377cb824894ecf1f586c7be43ac5a083472a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9343 - root_mean_squared_error: 1.6457 - val_loss: 0.2560 - val_root_mean_squared_error: 0.5060
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1421 - root_mean_squared_error: 0.3770
Epoch 2: val_root_mean_squared_error improved from 0.50597 to 0.41015, saving model to cache/ensemble_camembert-base/models/mlp/7f65c1db0bf5816778ccfac18dbf377cb824894ecf1f586c7be43ac5a083472a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5938 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9326,2.190499,1.480033,1.201656,2.190499
46,1.6371,0.781508,0.88403,0.728373,0.781508
69,0.6488,0.747746,0.864723,0.712947,0.747746


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.2874 - root_mean_squared_error: 2.6995
Epoch 1: val_root_mean_squared_error improved from inf to 0.56360, saving model to cache/ensemble_camembert-base/models/mlp/cd4998d41f3be2232c447d11389f4d70eaf55c635ad49187ab78fbeadd7e3492_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7412 - root_mean_squared_error: 1.5890 - val_loss: 0.3176 - val_root_mean_squared_error: 0.5636
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1264 - root_mean_squared_error: 0.3555
Epoch 2: val_root_mean_squared_error improved from 0.56360 to 0.46604, saving model to cache/ensemble_camembert-base/models/mlp/cd4998d41f3be2232c447d11389f4d70eaf55c635ad49187ab78fbeadd7e3492_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3519 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0508,2.477451,1.573992,1.313433,2.477452
46,1.0707,0.752508,0.867472,0.715785,0.752508
69,0.9395,0.712897,0.844332,0.686179,0.712897


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 7.0709 - root_mean_squared_error: 2.6591
Epoch 1: val_root_mean_squared_error improved from inf to 0.59921, saving model to cache/ensemble_camembert-base/models/mlp/474df8b6e3907935850fd01c78e7384e00b4dd9a1d64a3fdd8ebdc32fcadfdf8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2304 - root_mean_squared_error: 1.7251 - val_loss: 0.3591 - val_root_mean_squared_error: 0.5992
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5764 - root_mean_squared_error: 0.7592
Epoch 2: val_root_mean_squared_error did not improve from 0.59921
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5924 - root_mean_squared_error: 0.7649 - val_loss: 1.4040 - val_root_mean_squared_error: 1.1849
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.879,3.600366,1.897463,1.59289,3.600366
46,1.438,1.132927,1.06439,0.897464,1.132927
69,0.8872,1.078662,1.038587,0.904543,1.078662


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 5.3144 - root_mean_squared_error: 2.3053
Epoch 1: val_root_mean_squared_error improved from inf to 1.08183, saving model to cache/ensemble_camembert-base/models/mlp/73ddc85f8bf9c6e15b91737a92d7b7d80d0b2db80567c5393f027defb387fc3c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5730 - root_mean_squared_error: 1.5485 - val_loss: 1.1704 - val_root_mean_squared_error: 1.0818
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6058 - root_mean_squared_error: 0.7783
Epoch 2: val_root_mean_squared_error improved from 1.08183 to 0.36749, saving model to cache/ensemble_camembert-base/models/mlp/73ddc85f8bf9c6e15b91737a92d7b7d80d0b2db80567c5393f027defb387fc3c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3567 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2915,2.292363,1.514055,1.310261,2.292363
46,1.2153,0.615046,0.784248,0.617569,0.615046
69,0.9716,0.59093,0.76872,0.608215,0.59093


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 10.3879 - root_mean_squared_error: 3.2230
Epoch 1: val_root_mean_squared_error improved from inf to 0.65397, saving model to cache/ensemble_camembert-base/models/mlp/b4d66ea1b50ce4ffaf7430ca73e58b8cf882116ec261949890cabb97dce89ef2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3811 - root_mean_squared_error: 1.7481 - val_loss: 0.4277 - val_root_mean_squared_error: 0.6540
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5709 - root_mean_squared_error: 0.7556
Epoch 2: val_root_mean_squared_error improved from 0.65397 to 0.39205, saving model to cache/ensemble_camembert-base/models/mlp/b4d66ea1b50ce4ffaf7430ca73e58b8cf882116ec261949890cabb97dce89ef2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4069 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9232,1.822824,1.35012,1.087166,1.822824
46,1.2951,0.712925,0.844349,0.699938,0.712925
69,0.9023,0.766999,0.875785,0.750565,0.766999


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 10.1034 - root_mean_squared_error: 3.1786
Epoch 1: val_root_mean_squared_error improved from inf to 0.48157, saving model to cache/ensemble_camembert-base/models/mlp/2915e102be27d8ee21602ad49f1da4d8af2bca91b52c9aa804e29667dee2d601_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.8105 - root_mean_squared_error: 1.8544 - val_loss: 0.2319 - val_root_mean_squared_error: 0.4816
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4945 - root_mean_squared_error: 0.7032
Epoch 2: val_root_mean_squared_error improved from 0.48157 to 0.39060, saving model to cache/ensemble_camembert-base/models/mlp/2915e102be27d8ee21602ad49f1da4d8af2bca91b52c9aa804e29667dee2d601_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5566 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6556,1.951262,1.396876,1.181477,1.951262
46,1.2946,0.630231,0.793871,0.632358,0.630231
69,0.824,0.551923,0.742915,0.572921,0.551923


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 5.1211 - root_mean_squared_error: 2.2630
Epoch 1: val_root_mean_squared_error improved from inf to 0.60983, saving model to cache/ensemble_camembert-base/models/mlp/a26faa62580a71129dba42331db1036b03fb5a20d296815b9af286c97f35a242_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.2067 - root_mean_squared_error: 1.4341 - val_loss: 0.3719 - val_root_mean_squared_error: 0.6098
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2476 - root_mean_squared_error: 0.4976
Epoch 2: val_root_mean_squared_error did not improve from 0.60983
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5183 - root_mean_squared_error: 0.7118 - val_loss: 0.9104 - val_root_mean_squared_error: 0.9542
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9271,2.865278,1.692713,1.481907,2.865278
46,1.1715,0.663785,0.81473,0.674112,0.663785
69,0.7588,0.615953,0.784827,0.657108,0.615953


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 6.6052 - root_mean_squared_error: 2.5701
Epoch 1: val_root_mean_squared_error improved from inf to 0.65922, saving model to cache/ensemble_camembert-base/models/mlp/2ababbbb45c050468583c2425018c72226390d26da4dec07fee2953baf734031_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5550 - root_mean_squared_error: 1.5365 - val_loss: 0.4346 - val_root_mean_squared_error: 0.6592
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.9237 - root_mean_squared_error: 0.9611
Epoch 2: val_root_mean_squared_error did not improve from 0.65922
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6909 - root_mean_squared_error: 0.8222 - val_loss: 0.6528 - val_root_mean_squared_error: 0.8079
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8787,2.07361,1.440004,1.238489,2.07361
46,1.2823,0.550887,0.742218,0.557833,0.550887
69,0.8432,0.522536,0.722867,0.534441,0.522536


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 6.8475 - root_mean_squared_error: 2.6168
Epoch 1: val_root_mean_squared_error improved from inf to 0.63214, saving model to cache/ensemble_camembert-base/models/mlp/ff2a65cacce63aeb7c2854a9ae5f4836168fb211c8e959317999816dff00f166_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0759 - root_mean_squared_error: 1.6801 - val_loss: 0.3996 - val_root_mean_squared_error: 0.6321
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5163 - root_mean_squared_error: 0.7185
Epoch 2: val_root_mean_squared_error improved from 0.63214 to 0.40418, saving model to cache/ensemble_camembert-base/models/mlp/ff2a65cacce63aeb7c2854a9ae5f4836168fb211c8e959317999816dff00f166_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6054 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6014,3.716692,1.927872,1.629726,3.716692
46,1.4031,1.149393,1.072097,0.873664,1.149393
69,0.7154,1.074484,1.036573,0.856225,1.074484


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.5312 - root_mean_squared_error: 2.7443
Epoch 1: val_root_mean_squared_error improved from inf to 0.49930, saving model to cache/ensemble_camembert-base/models/mlp/f079113d8c72a0c2b294bf13e8fa99ecdec163af6fb1c0e7b61d541ab3c0b50c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6444 - root_mean_squared_error: 1.5478 - val_loss: 0.2493 - val_root_mean_squared_error: 0.4993
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2256 - root_mean_squared_error: 0.4749
Epoch 2: val_root_mean_squared_error improved from 0.49930 to 0.34645, saving model to cache/ensemble_camembert-base/models/mlp/f079113d8c72a0c2b294bf13e8fa99ecdec163af6fb1c0e7b61d541ab3c0b50c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5049 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0513,2.691774,1.640663,1.372421,2.691774
46,1.2019,0.916294,0.957232,0.764595,0.916294
69,0.9233,0.880277,0.938231,0.74691,0.880277


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 7.8014 - root_mean_squared_error: 2.7931
Epoch 1: val_root_mean_squared_error improved from inf to 0.67788, saving model to cache/ensemble_camembert-base/models/mlp/240d66e1cea4d06fbe149315f1e4c9620b2522b8507382030e2f974aaa6768f4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2738 - root_mean_squared_error: 1.7241 - val_loss: 0.4595 - val_root_mean_squared_error: 0.6779
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6330 - root_mean_squared_error: 0.7956
Epoch 2: val_root_mean_squared_error did not improve from 0.67788
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6179 - root_mean_squared_error: 0.7824 - val_loss: 0.5930 - val_root_mean_squared_error: 0.7701
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8611,2.238914,1.4963,1.180896,2.238914
46,1.6161,0.909268,0.953555,0.797001,0.909268
69,0.7102,0.945613,0.972426,0.823876,0.945613


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 10.0020 - root_mean_squared_error: 3.1626
Epoch 1: val_root_mean_squared_error improved from inf to 0.80985, saving model to cache/ensemble_camembert-base/models/mlp/8aea381324eb3df6a3b42f259354fde1157b5bcd6514734139e5ea7e3724bce1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2181 - root_mean_squared_error: 1.7078 - val_loss: 0.6559 - val_root_mean_squared_error: 0.8098
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9510 - root_mean_squared_error: 0.9752
Epoch 2: val_root_mean_squared_error improved from 0.80985 to 0.67908, saving model to cache/ensemble_camembert-base/models/mlp/8aea381324eb3df6a3b42f259354fde1157b5bcd6514734139e5ea7e3724bce1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6334 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4872,2.370769,1.53973,1.28738,2.370769
46,1.3666,0.760418,0.872019,0.719949,0.760418
69,0.765,0.753379,0.867974,0.723997,0.753379


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 8.2703 - root_mean_squared_error: 2.8758
Epoch 1: val_root_mean_squared_error improved from inf to 0.83381, saving model to cache/ensemble_camembert-base/models/mlp/3ab23158f933f07c9d26e1a0bb8808f4dc6e7c165f331c7c4ac24c811a09acc7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7805 - root_mean_squared_error: 1.5839 - val_loss: 0.6952 - val_root_mean_squared_error: 0.8338
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.8418 - root_mean_squared_error: 0.9175
Epoch 2: val_root_mean_squared_error improved from 0.83381 to 0.64170, saving model to cache/ensemble_camembert-base/models/mlp/3ab23158f933f07c9d26e1a0bb8808f4dc6e7c165f331c7c4ac24c811a09acc7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7181 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7918,2.927464,1.710983,1.464774,2.927463
46,1.3658,0.804798,0.897105,0.76218,0.804798
69,0.9275,0.788189,0.8878,0.77627,0.788189


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 8.9586 - root_mean_squared_error: 2.9931
Epoch 1: val_root_mean_squared_error improved from inf to 0.85846, saving model to cache/ensemble_camembert-base/models/mlp/766345ef46e6d5a2425bb88b9d30b2995b01188292bbee2f59923a4098eafe0c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0092 - root_mean_squared_error: 1.6553 - val_loss: 0.7369 - val_root_mean_squared_error: 0.8585
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5436 - root_mean_squared_error: 0.7373
Epoch 2: val_root_mean_squared_error did not improve from 0.85846
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3294 - root_mean_squared_error: 0.5716 - val_loss: 0.8677 - val_root_mean_squared_error: 0.9315
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7381,2.649909,1.627854,1.309052,2.649909
46,1.1523,0.951668,0.975535,0.812596,0.951668
69,0.8609,0.945666,0.972453,0.756777,0.945666


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 7.5593 - root_mean_squared_error: 2.7494
Epoch 1: val_root_mean_squared_error improved from inf to 0.71926, saving model to cache/ensemble_camembert-base/models/mlp/f8374adf54f854a9ffa79d69409571f6ce24274b3c886dfb5901f7dc049e4de5_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1935 - root_mean_squared_error: 1.7062 - val_loss: 0.5173 - val_root_mean_squared_error: 0.7193
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3057 - root_mean_squared_error: 0.5529
Epoch 2: val_root_mean_squared_error did not improve from 0.71926
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4894 - root_mean_squared_error: 0.6959 - val_loss: 0.8371 - val_root_mean_squared_error: 0.9149
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8587,2.737128,1.654427,1.367368,2.737128
46,1.3268,0.887994,0.942334,0.771614,0.887994
69,0.7545,0.822942,0.907162,0.736358,0.822942


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 6.3125 - root_mean_squared_error: 2.5125
Epoch 1: val_root_mean_squared_error improved from inf to 0.81172, saving model to cache/ensemble_camembert-base/models/mlp/537412fa7ddcdcf39abdc1b52fb1d1b492361c30b6f934dc9bebf761ad05f51c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5652 - root_mean_squared_error: 1.5417 - val_loss: 0.6589 - val_root_mean_squared_error: 0.8117
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8310 - root_mean_squared_error: 0.9116
Epoch 2: val_root_mean_squared_error improved from 0.81172 to 0.78902, saving model to cache/ensemble_camembert-base/models/mlp/537412fa7ddcdcf39abdc1b52fb1d1b492361c30b6f934dc9bebf761ad05f51c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5073 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.6844,2.2374,1.495794,1.197877,2.2374
46,1.4714,0.838933,0.915933,0.787237,0.838933
69,0.722,0.860859,0.927825,0.787253,0.860859


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 6.8357 - root_mean_squared_error: 2.6145
Epoch 1: val_root_mean_squared_error improved from inf to 0.57312, saving model to cache/ensemble_camembert-base/models/mlp/90881b0c9ac8b07035517726acaa38bc35198e6bb03aa3c734de6d40cd1812fd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 3.2085 - root_mean_squared_error: 1.7198 - val_loss: 0.3285 - val_root_mean_squared_error: 0.5731
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2439 - root_mean_squared_error: 0.4938
Epoch 2: val_root_mean_squared_error did not improve from 0.57312
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4678 - root_mean_squared_error: 0.6761 - val_loss: 0.3857 - val_root_mean_squared_error: 0.6211
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9274,1.938255,1.392212,1.180689,1.938255
46,1.3328,0.607307,0.779299,0.691696,0.607307
69,0.8473,0.622393,0.788919,0.699176,0.622393


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 9.9119 - root_mean_squared_error: 3.1483
Epoch 1: val_root_mean_squared_error improved from inf to 0.90507, saving model to cache/ensemble_camembert-base/models/mlp/5c88cf3b0896e33ec3d6a3d38846142c2a17f32e4547eb2283c5751d66bc0d49_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9923 - root_mean_squared_error: 1.6376 - val_loss: 0.8191 - val_root_mean_squared_error: 0.9051
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4909 - root_mean_squared_error: 0.7007
Epoch 2: val_root_mean_squared_error improved from 0.90507 to 0.60117, saving model to cache/ensemble_camembert-base/models/mlp/5c88cf3b0896e33ec3d6a3d38846142c2a17f32e4547eb2283c5751d66bc0d49_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4854 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.4232,3.02604,1.739552,1.459687,3.02604
46,1.289,0.902223,0.949854,0.727508,0.902223
69,0.7555,0.886232,0.941399,0.726654,0.886232


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 7.8646 - root_mean_squared_error: 2.8044
Epoch 1: val_root_mean_squared_error improved from inf to 0.52435, saving model to cache/ensemble_camembert-base/models/mlp/320e69484e06dd407d99937704d8bdc511bb47bba6effc28c75d38d40fa306a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4735 - root_mean_squared_error: 1.4994 - val_loss: 0.2749 - val_root_mean_squared_error: 0.5243
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5248 - root_mean_squared_error: 0.7244
Epoch 2: val_root_mean_squared_error improved from 0.52435 to 0.38446, saving model to cache/ensemble_camembert-base/models/mlp/320e69484e06dd407d99937704d8bdc511bb47bba6effc28c75d38d40fa306a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6117 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.3917,2.977894,1.725658,1.430625,2.977894
46,1.5098,0.936213,0.967581,0.789553,0.936213
69,0.7603,0.911996,0.954985,0.793751,0.911996


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 7.8408 - root_mean_squared_error: 2.8001
Epoch 1: val_root_mean_squared_error improved from inf to 0.98263, saving model to cache/ensemble_camembert-base/models/mlp/e57078a0c5faec94ad466edfd806d32721606ab1fa5dbbbd9dae6e344c5356b6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3435 - root_mean_squared_error: 1.7525 - val_loss: 0.9656 - val_root_mean_squared_error: 0.9826
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0393 - root_mean_squared_error: 1.0195
Epoch 2: val_root_mean_squared_error improved from 0.98263 to 0.42125, saving model to cache/ensemble_camembert-base/models/mlp/e57078a0c5faec94ad466edfd806d32721606ab1fa5dbbbd9dae6e344c5356b6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5533 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0918,2.395242,1.547657,1.313116,2.395242
46,1.3097,0.675136,0.821667,0.66355,0.675136
69,0.748,0.677065,0.82284,0.662791,0.677065


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 6.4055 - root_mean_squared_error: 2.5309
Epoch 1: val_root_mean_squared_error improved from inf to 0.45480, saving model to cache/ensemble_camembert-base/models/mlp/24cac461bf27a7a91afc20ff9c5686604e054186cde4e184e0d53d70a3effb70_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8400 - root_mean_squared_error: 1.6183 - val_loss: 0.2068 - val_root_mean_squared_error: 0.4548
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.2304 - root_mean_squared_error: 0.4800
Epoch 2: val_root_mean_squared_error did not improve from 0.45480
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5226 - root_mean_squared_error: 0.7135 - val_loss: 0.3750 - val_root_mean_squared_error: 0.6123
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9488,2.496523,1.580039,1.306035,2.496523
46,1.1613,0.810855,0.900475,0.738104,0.810855
69,0.8362,0.74423,0.862688,0.689531,0.74423


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 10.0711 - root_mean_squared_error: 3.1735
Epoch 1: val_root_mean_squared_error improved from inf to 0.68287, saving model to cache/ensemble_camembert-base/models/mlp/62d348d3e187f1e4f25cc0a0a6c96bb5fd0c3a081aaded6df0765e542b5dc291_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.5424 - root_mean_squared_error: 1.7874 - val_loss: 0.4663 - val_root_mean_squared_error: 0.6829
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5851 - root_mean_squared_error: 0.7649
Epoch 2: val_root_mean_squared_error improved from 0.68287 to 0.45892, saving model to cache/ensemble_camembert-base/models/mlp/62d348d3e187f1e4f25cc0a0a6c96bb5fd0c3a081aaded6df0765e542b5dc291_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5307 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9075,2.155137,1.468038,1.16089,2.155137
46,1.3668,0.906176,0.951933,0.817136,0.906176
69,0.9139,0.844036,0.918714,0.793443,0.844036


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 6.4911 - root_mean_squared_error: 2.5478
Epoch 1: val_root_mean_squared_error improved from inf to 0.85837, saving model to cache/ensemble_camembert-base/models/mlp/b42318f94e536085f45735c550c3139d3ed2fa9f0c63baf51c66352cbfe594d2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.2904 - root_mean_squared_error: 1.4491 - val_loss: 0.7368 - val_root_mean_squared_error: 0.8584
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5040 - root_mean_squared_error: 0.7099
Epoch 2: val_root_mean_squared_error improved from 0.85837 to 0.72854, saving model to cache/ensemble_camembert-base/models/mlp/b42318f94e536085f45735c550c3139d3ed2fa9f0c63baf51c66352cbfe594d2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5999 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3909,2.202404,1.48405,1.220548,2.202405
46,1.2869,0.774296,0.879941,0.74977,0.774296
69,0.7441,0.79642,0.892424,0.752459,0.79642


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 8.3026 - root_mean_squared_error: 2.8814
Epoch 1: val_root_mean_squared_error improved from inf to 0.51208, saving model to cache/ensemble_camembert-base/models/mlp/2f7e21a3d2496bad13dd4d94d85ef053ee7527b63a4b7e0915caeec7f359da0e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0181 - root_mean_squared_error: 1.6611 - val_loss: 0.2622 - val_root_mean_squared_error: 0.5121
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3214 - root_mean_squared_error: 0.5669
Epoch 2: val_root_mean_squared_error did not improve from 0.51208
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5811 - root_mean_squared_error: 0.7556 - val_loss: 0.3939 - val_root_mean_squared_error: 0.6276
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.7181,1.717888,1.310682,1.121314,1.717888
46,1.1418,0.567319,0.753206,0.615277,0.567319
69,0.9387,0.530571,0.728403,0.568471,0.530571


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 3.5118 - root_mean_squared_error: 1.8740
Epoch 1: val_root_mean_squared_error improved from inf to 0.70434, saving model to cache/ensemble_camembert-base/models/mlp/07c5a62a62bdfc9945151511d58b9468f7cf90a70bd05a72ec6dab40701a17e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5730 - root_mean_squared_error: 1.5429 - val_loss: 0.4961 - val_root_mean_squared_error: 0.7043
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4256 - root_mean_squared_error: 0.6524
Epoch 2: val_root_mean_squared_error improved from 0.70434 to 0.36182, saving model to cache/ensemble_camembert-base/models/mlp/07c5a62a62bdfc9945151511d58b9468f7cf90a70bd05a72ec6dab40701a17e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5799 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8434,2.28496,1.511608,1.250725,2.28496
46,1.2426,0.776355,0.88111,0.725312,0.776355
69,0.9527,0.83088,0.911526,0.764833,0.83088


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 5.5171 - root_mean_squared_error: 2.3489
Epoch 1: val_root_mean_squared_error improved from inf to 0.58776, saving model to cache/ensemble_camembert-base/models/mlp/dc4cd7688ecfa828f75c5cb8a1f90da9979d39194747880c8ea602c4e9c91a66_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8878 - root_mean_squared_error: 1.6373 - val_loss: 0.3455 - val_root_mean_squared_error: 0.5878
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1740 - root_mean_squared_error: 0.4172
Epoch 2: val_root_mean_squared_error improved from 0.58776 to 0.43455, saving model to cache/ensemble_camembert-base/models/mlp/dc4cd7688ecfa828f75c5cb8a1f90da9979d39194747880c8ea602c4e9c91a66_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5216 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2667,3.970064,1.992502,1.771314,3.970064
46,1.3011,0.96694,0.983331,0.736698,0.96694
69,1.0087,0.988499,0.994233,0.739777,0.988498


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.3846 - root_mean_squared_error: 2.7175
Epoch 1: val_root_mean_squared_error improved from inf to 0.54942, saving model to cache/ensemble_camembert-base/models/mlp/303499d53bb8bf3d67c7b9909d20171c8ee4aa9e68718d774a60e37b8371aa02_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7058 - root_mean_squared_error: 1.5703 - val_loss: 0.3019 - val_root_mean_squared_error: 0.5494
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8199 - root_mean_squared_error: 0.9055
Epoch 2: val_root_mean_squared_error improved from 0.54942 to 0.37190, saving model to cache/ensemble_camembert-base/models/mlp/303499d53bb8bf3d67c7b9909d20171c8ee4aa9e68718d774a60e37b8371aa02_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6258 - root_mean_squared_error: 0.7

  df_macro_ensemble_scores = pd.concat([df_macro_ensemble_scores, new_row], ignore_index=True)


In [None]:
%rm -rf cache

In [None]:
from google.colab import files
!zip -r /content/logs.zip /content/training/cache/ensemble_camembert-base/logs

files.download('/content/logs.zip')

  adding: content/training/cache/ensemble_camembert-base/logs/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_49/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_49/events.out.tfevents.1725250844.d432371e2147.788.169 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_49/events.out.tfevents.1725247587.d432371e2147.788.49 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_49/events.out.tfevents.1725252511.d432371e2147.788.229 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_49/events.out.tfevents.1725254269.d432371e2147.788.289 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_49/events.out.tfevents.1725249198.d432371e2147.788.109 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!cp -r /content/training/cache/ensemble_camembert-base/models /content/drive/MyDrive/Models0109

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
