## A. Installation

### A.1. Structure

Pour réinitialiser la structure (from scratch) :

In [1]:
!cd /content
!rm -rf /content/Merval
!git clone https://github.com/mervealgan/Merval

Cloning into 'Merval'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 35 (delta 12), reused 29 (delta 11), pack-reused 0 (from 0)[K
Receiving objects: 100% (35/35), 366.01 KiB | 33.27 MiB/s, done.
Resolving deltas: 100% (12/12), done.


In [2]:
!rm -rf /content/training/data
!mkdir -p /content/training/data/features

!cp -r /content/Merval/data/features/* /content/training/data/features
!cp /content/Merval/data/test_set.csv /content/training/data/test_set.csv
!cp /content/Merval/data/training_set.csv /content/training/data/training_set.csv
!cp /content/Merval/data/valid_set.csv /content/training/data/valid_set.csv

In [3]:
%cd /content/training/

/content/training


### A.2. imports

In [4]:
!pip install tbparse
!pip install syntok
!pip install stanza
!pip install textcomplexity
!pip install transformers[torch]
!pip install accelerate -U

Collecting tbparse
  Downloading tbparse-0.0.9-py3-none-any.whl.metadata (8.7 kB)
Downloading tbparse-0.0.9-py3-none-any.whl (19 kB)
Installing collected packages: tbparse
Successfully installed tbparse-0.0.9
Collecting syntok
  Downloading syntok-1.4.4-py3-none-any.whl.metadata (10 kB)
Downloading syntok-1.4.4-py3-none-any.whl (24 kB)
Installing collected packages: syntok
Successfully installed syntok-1.4.4
Collecting stanza
  Downloading stanza-1.8.2-py3-none-any.whl.metadata (13 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.12.1-py3-none-any.whl.metadata (5.4 kB)
Downloading stanza-1.8.2-py3-none-any.whl (990 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.1/990.1 kB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading emoji-2.12.1-py3-none-any.whl (431 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m431.4/431.4 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji, stanza
Successfully in

## B. Entrainement

In [5]:
import hashlib
import os
import torch
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import (Sequential, load_model)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from transformers import (AutoTokenizer, TrainingArguments, AutoModelForSequenceClassification, set_seed, Trainer,
                          EarlyStoppingCallback, )


class TCCDataset(torch.utils.data.Dataset):
    def __init__(self, tokens, labels):
        self.tokens = tokens
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # return tensor
        item = {key: val[idx].clone().detach() for key, val in self.tokens.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item


class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.MSELoss()
        loss = loss_fct(
            logits.view(-1, self.model.config.num_labels),
            labels.float().view(-1, self.model.config.num_labels),
        )
        return (loss, outputs) if return_outputs else loss


class OptimizedESCallback(EarlyStoppingCallback):
    def __init__(self, patience, initial_steps_wo_save):
        super().__init__(early_stopping_patience=patience)
        self.initial_steps_wo_save = initial_steps_wo_save

    def check_metric_value(self, args, state, control, metric_value):
        super().check_metric_value(args, state, control, metric_value)
        if self.early_stopping_patience_counter == 0:
            control.should_save = True

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        if state.global_step < self.initial_steps_wo_save:
            return
        super().on_evaluate(args, state, control, metrics, **kwargs)


def compute_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    return {
        "root_mean_squared_error": rmse,
        "mean_absolute_error": mae,
        "mean_squared_error": mse,
    }


def compute_metrics_for_regression(eval_pred):
    logits, labels = eval_pred
    labels = labels.reshape(-1, 1)

    rmse = mean_squared_error(labels, logits, squared=False)
    mse = mean_squared_error(labels, logits)
    mae = mean_absolute_error(labels, logits)

    return {
        "root_mean_squared_error": rmse,
        "mean_absolute_error": mae,
        "mean_squared_error": mse,
    }


def get_hugging_face_name(name):
    if name == "camembert-base":
        return "almanach/camembert-base"
    if name == "camembert-large":
        return "almanach/camembert-large"
    return ""


def load_dataset(path, encoding="utf-8", shuffle=True):
    df = pd.read_csv(path, encoding=encoding)
    df.drop_duplicates(inplace=True)
    if shuffle:
        df = df.sample(frac=1, random_state=9).reset_index(drop=True)
    return df


def load_dataset_with_features_fr(dataset, data_root_path='data_fr'):
    df = load_dataset(os.path.join(data_root_path, f'{dataset}_set.csv'))
    df_features = pd.read_csv(os.path.join(data_root_path, 'features', f'features_{dataset}_readability_fr.csv'))
    df_merged = df.merge(df_features, on='ID', suffixes=('', '_df2'))

    # drop or ignore some columns
    ignore_columns = ['sentence_df2', 'paragraphs', 'sentences_per_paragraph']
    df_merged.drop(columns=ignore_columns, inplace=True)

    # add some of our own features
    df_merged['max_word_length'] = df_merged['sentence'].apply(lambda x: max([len(w) for w in x.split()]))

    for i in range(5, 10):
        df_merged['num_word_longer_than_' + str(i)] = df_merged['sentence'].apply(
            lambda x: sum([len(w) > i for w in x.split()]))

    feature_columns = df_merged.columns.to_list()[df_merged.columns.to_list().index('sentence') + 1:]

    return df_merged, feature_columns


os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

BOOTSTRAP_SIZE = 1000  # 1000
MAX_ENSEMBLE_SIZE = 35  # 60
ENSEMBLE_POOL_SIZE = 40  # 100
N_FOLDS = 5
MODEL_NAME = 'camembert-base'  # ['gbert', 'gelectra', 'gottbert', 'gerpt']
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 16
N_EVAL_STEPS = 23

EXPERIMENT_NAME = f'ensemble_{MODEL_NAME}'
EXPERIMENT_DIR = f'cache/{EXPERIMENT_NAME}'

from tensorflow.keras.callbacks import TensorBoard
log_dir = f'{EXPERIMENT_DIR}/logs/mlp/'
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

df_train, feature_columns = load_dataset_with_features_fr('training', data_root_path='data')


def get_predictions(
        df_train_folds,
        df_val_fold,
        n_epochs=5,
        n_log_steps=10,
):
    tf.debugging.disable_traceback_filtering()
    # storing predictions in dataframe
    # columns: Sentence, Prediction of Model 1, Prediction of Model 2, ...
    df_predictions_val_fold = df_val_fold[['ID', 'sentence']].copy()

    # get tokenizer
    tokenizer = AutoTokenizer.from_pretrained(get_hugging_face_name(MODEL_NAME))

    X_val_fold = df_val_fold['sentence'].values
    X_val_fold_features = df_val_fold[feature_columns].values

    # tokenize
    tokens_val_fold = tokenizer(X_val_fold.tolist(), padding='max_length', return_tensors='pt', truncation=True,
                                max_length=128)

    for k in range(ENSEMBLE_POOL_SIZE):
        df_early_stopping = df_train_folds.sample(frac=0.1, random_state=k)
        df_train_no_es = df_train_folds.drop(
            df_train_folds[
                df_train_folds['ID'].isin(df_early_stopping['ID'])
            ].index
        )

        ## or use this simplified code to drop rows whose 'ID' is in df_early_stopping['ID']
        # df_train_no_es = df_train_folds[~df_train_folds['ID'].isin(df_early_stopping['ID'])]

        X_early_stopping = df_early_stopping['sentence'].values
        X_early_stopping_features = df_early_stopping[feature_columns].values
        y_early_stopping = df_early_stopping['MOS'].values

        X_training = df_train_no_es['sentence'].values
        X_training_features = df_train_no_es[feature_columns].values
        y_training = df_train_no_es['MOS'].values

        # tokenize
        tokens_early_stopping = tokenizer(X_early_stopping.tolist(), padding='max_length', return_tensors='pt',
                                          truncation=True, max_length=128)

        tokens_training = tokenizer(X_training.tolist(), padding='max_length', return_tensors='pt', truncation=True,
                                    max_length=128)

        hash = (
                hashlib.sha256(
                    pd.util.hash_pandas_object(df_train_no_es['ID'], index=True).values
                ).hexdigest()
                + '_'
                + get_hugging_face_name(MODEL_NAME)[
                  get_hugging_face_name(MODEL_NAME).find('/') + 1:
                  ]
        )

        # load model and, if necessary, train it
        try:
            print(f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}')
            model = AutoModelForSequenceClassification.from_pretrained(
                f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}', local_files_only=True, num_labels=1
            )
        except EnvironmentError:
            # create training dataset
            early_stopping_dataset = TCCDataset(tokens_early_stopping, y_early_stopping)
            training_dataset = TCCDataset(tokens_training, y_training)

            training_args = TrainingArguments(
                output_dir=f'{EXPERIMENT_DIR}/{MODEL_NAME}_trainer/',
                num_train_epochs=n_epochs,
                per_device_train_batch_size=TRAIN_BATCH_SIZE,
                per_device_eval_batch_size=VALID_BATCH_SIZE,
                warmup_ratio=0.3,
                learning_rate=3e-5, # Changed from 5e-5
                no_cuda=False,
                metric_for_best_model='root_mean_squared_error',
                greater_is_better=False,
                load_best_model_at_end=True,
                save_steps=N_EVAL_STEPS * 100_000,
                # we never want to save a model through this function, but the parameter must be set, because of load_best_model_at_end=True
                save_total_limit=1,  # can be 1, because we only save, when we find a better model
                eval_steps=N_EVAL_STEPS,
                # `evaluation_strategy` is deprecated, Use `eval_strategy` instead
                eval_strategy='steps',
                seed=k,
                logging_steps=n_log_steps,
                logging_dir=f'{EXPERIMENT_DIR}/logs/member_{k}',
                logging_strategy='steps',
            )

            set_seed(training_args.seed)
            model = AutoModelForSequenceClassification.from_pretrained(
                get_hugging_face_name(MODEL_NAME), num_labels=1
            )

            trainer = RegressionTrainer(
                model=model,
                args=training_args,
                train_dataset=training_dataset,
                eval_dataset=early_stopping_dataset,
                compute_metrics=compute_metrics_for_regression,
                callbacks=[OptimizedESCallback(patience=5, initial_steps_wo_save=300)],
            )
            # training
            trainer.train()

            # save model
            model.save_pretrained(f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}')

        # load hidden states of model for validation and test data
        hidden_state_val_fold = extract_hidden_state(model, tokens_val_fold)

        # normalize data with StandardScaler
        scaler = StandardScaler()
        scaler.fit(df_train_folds[feature_columns].values)
        X_val_fold_features_scaled = scaler.transform(X_val_fold_features)
        X_val_fold_with_features = np.concatenate((hidden_state_val_fold.detach().numpy(), X_val_fold_features_scaled),
                                                  axis=1)

        # load MLP model and, if necessary, train it
        try:
            mlp = load_model(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')
        except Exception:
            hidden_state_train = extract_hidden_state(model, tokens_training)
            hidden_state_early_stopping = extract_hidden_state(model, tokens_early_stopping)

            np.random.seed(k)
            mlp = Sequential(
                [
                    Input(shape=(model.config.hidden_size + len(feature_columns),), name='input'),
                    Dense(model.config.hidden_size, activation='relu', name='layer1'),
                    Dense(1, activation='linear', name='layer2'),
                ]
            )

            mlp.compile(
                optimizer='rmsprop',
                loss=tf.keras.losses.MeanSquaredError(),
                metrics=[tf.keras.metrics.RootMeanSquaredError()],
            )
            es = EarlyStopping(monitor='val_root_mean_squared_error', mode='min', verbose=1, patience=100)
            mc = ModelCheckpoint(
                f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras',
                 monitor='val_root_mean_squared_error',
                 mode='min',
                 verbose=1,
                 save_best_only=True
            )

            # normalize data with StandardScaler
            scaler = StandardScaler()
            scaler.fit(X_training_features)
            X_train_features_scaled = scaler.transform(X_training_features)
            X_es_features_scaled = scaler.transform(X_early_stopping_features)

            X_train_with_features = np.concatenate((hidden_state_train.detach().numpy(), X_train_features_scaled),
                                                   axis=1)
            X_es_with_features = np.concatenate((hidden_state_early_stopping.detach().numpy(), X_es_features_scaled),
                                                axis=1)

            mlp.fit(X_train_with_features, y_training,
                    validation_data=(X_es_with_features, y_early_stopping),
                    batch_size=TRAIN_BATCH_SIZE,
                    #epochs=5000, callbacks=[es, mc])
                    epochs=10, callbacks=[tensorboard_callback, es, mc])

        # Manually save the model after training
        mlp.save(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')

        mlp = tf.keras.models.load_model(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')

        # predict MLP on validation and test sets
        prediction_val_fold = mlp.predict(X_val_fold_with_features, batch_size=VALID_BATCH_SIZE)

        df_predictions_val_fold[f'{MODEL_NAME}_prediction_{k}'] = prediction_val_fold

    return df_predictions_val_fold


def extract_hidden_state(model, tokens, batch_size=16):
    last_last_hidden_state = torch.zeros((len(tokens.input_ids), model.config.hidden_size))
    model = model.cuda().eval()
    with torch.no_grad():
        for i in range(0, len(tokens.input_ids), batch_size):
            if i + batch_size > len(tokens.input_ids):
                input_i = tokens.input_ids[i:]
            else:
                input_i = tokens.input_ids[i:i + batch_size]
            output = model(input_i.cuda(), output_hidden_states=True)
            last_hidden_state = output.hidden_states[-1].cpu()
            idx_last_token = torch.zeros(len(input_i)).long()
            last_last_hidden_state[i:i + len(idx_last_token)] = last_hidden_state[
                torch.arange(len(idx_last_token)), idx_last_token]
    return last_last_hidden_state


# dataframe for each metric for each model for each ensemble size
# 3d array: [ensemble_size, model_index, metric_index]
df_macro_ensemble_scores = pd.DataFrame(
    columns=[
        'ensemble_size',
        'model_name',
        'mean_absolute_error_mean',
        'mean_absolute_error_std',
        'mean_squared_error_mean',
        'mean_squared_error_std',
        'root_mean_squared_error_mean',
        'root_mean_squared_error_std',
    ]
)

for fold, (train_idx, val_idx) in enumerate(KFold(n_splits=N_FOLDS).split(df_train)):
    df_train_folds = df_train.loc[train_idx]
    df_val_fold = df_train.loc[val_idx]
    # fill na with mean of columns of train data
    df_train_folds = df_train_folds.fillna(df_train_folds.mean(numeric_only=True))
    df_val_fold = df_val_fold.fillna(df_train_folds.mean(numeric_only=True))

    y_val_fold = df_val_fold['MOS'].values

    pool_predictions_val_fold = get_predictions(df_train_folds, df_val_fold)

    for current_ensemble_size in range(1, MAX_ENSEMBLE_SIZE + 1):
        np.random.seed(current_ensemble_size)
        idx = np.random.choice(
            ENSEMBLE_POOL_SIZE,
            size=(BOOTSTRAP_SIZE, current_ensemble_size),
        )

        idx_mapped = np.array(
            [
                np.array(
                    [pool_predictions_val_fold[f'{MODEL_NAME}_prediction_{k}'] for k in j]
                )
                for j in idx
            ]
        )

        ensemble_predictions = np.array(
            [np.sum(j, axis=0) / len(j) for j in idx_mapped]
        )

        ensemble_scores = [
            compute_metrics(y_val_fold, pred) for pred in ensemble_predictions
        ]

        df_ensemble_scores = pd.DataFrame(ensemble_scores).sort_index(axis=1)

        # add to dataframe
        new_row = pd.DataFrame(
            {
                'ensemble_size': [current_ensemble_size],
                'model_name': [MODEL_NAME],
                'mean_absolute_error_mean': [df_ensemble_scores['mean_absolute_error'].mean()],
                'mean_absolute_error_std': [df_ensemble_scores['mean_absolute_error'].std()],
                'mean_squared_error_mean': [df_ensemble_scores['mean_squared_error'].mean()],
                'mean_squared_error_std': [df_ensemble_scores['mean_squared_error'].std()],
                'root_mean_squared_error_mean': [df_ensemble_scores['root_mean_squared_error'].mean()],
                'root_mean_squared_error_std': [df_ensemble_scores['root_mean_squared_error'].std()],
            })

df_macro_ensemble_scores = pd.concat([df_macro_ensemble_scores, new_row], ignore_index=True)

df_macro_ensemble_scores[
    df_macro_ensemble_scores['model_name'] == MODEL_NAME
    ].to_csv(
    f'ensemble_scores_{MODEL_NAME}.csv', index=False, sep=';', encoding='utf-8'
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/508 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/811k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.40M [00:00<?, ?B/s]

cache/ensemble_camembert-base/models/camembert-base/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base


model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3695,3.44752,1.85675,1.662486,3.44752
46,2.7211,1.175776,1.084332,0.78467,1.175776
69,1.1146,0.733297,0.856328,0.662076,0.733297


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m17s[0m 1s/step - loss: 7.7134 - root_mean_squared_error: 2.7773
Epoch 1: val_root_mean_squared_error improved from inf to 0.78206, saving model to cache/ensemble_camembert-base/models/mlp/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - loss: 2.7962 - root_mean_squared_error: 1.5926 - val_loss: 0.6116 - val_root_mean_squared_error: 0.7821
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2306 - root_mean_squared_error: 0.4802
Epoch 2: val_root_mean_squared_error improved from 0.78206 to 0.33249, saving model to cache/ensemble_camembert-base/models/mlp/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3564 - root_mean_squared_error: 0.5890

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1458,4.11469,2.02847,1.857979,4.11469
46,2.1924,1.281992,1.132251,0.922616,1.281992
69,1.0792,0.724388,0.85111,0.655561,0.724388


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 6.8395 - root_mean_squared_error: 2.6152
Epoch 1: val_root_mean_squared_error improved from inf to 0.69566, saving model to cache/ensemble_camembert-base/models/mlp/c91b46895596b42df4e1385f135441ffc449535d6e19f5b434fe664fa6596880_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5124 - root_mean_squared_error: 1.5157 - val_loss: 0.4839 - val_root_mean_squared_error: 0.6957
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2434 - root_mean_squared_error: 0.4933
Epoch 2: val_root_mean_squared_error improved from 0.69566 to 0.43481, saving model to cache/ensemble_camembert-base/models/mlp/c91b46895596b42df4e1385f135441ffc449535d6e19f5b434fe664fa6596880_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3216 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6322,3.321396,1.82247,1.645032,3.321396
46,2.335,1.008486,1.004234,0.798392,1.008486
69,1.1727,0.6232,0.78943,0.593352,0.6232


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 9.3547 - root_mean_squared_error: 3.0585
Epoch 1: val_root_mean_squared_error improved from inf to 0.93277, saving model to cache/ensemble_camembert-base/models/mlp/bb22c2bb2b0d9700bf8d7df910af0b4b56226bc98a51ea26c92306f122bc13de_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9783 - root_mean_squared_error: 1.6447 - val_loss: 0.8701 - val_root_mean_squared_error: 0.9328
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8099 - root_mean_squared_error: 0.8999
Epoch 2: val_root_mean_squared_error improved from 0.93277 to 0.40529, saving model to cache/ensemble_camembert-base/models/mlp/bb22c2bb2b0d9700bf8d7df910af0b4b56226bc98a51ea26c92306f122bc13de_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5058 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5943,3.278216,1.810584,1.630146,3.278216
46,2.2713,1.001374,1.000687,0.767274,1.001374
69,0.8595,0.628307,0.792658,0.633865,0.628307


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 6.6171 - root_mean_squared_error: 2.5724
Epoch 1: val_root_mean_squared_error improved from inf to 0.97094, saving model to cache/ensemble_camembert-base/models/mlp/8aef8ffb6ad3e8ad051fa6ca45595b1683b1810a3df6a0d40d82dadfc03b83cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8876 - root_mean_squared_error: 1.6261 - val_loss: 0.9427 - val_root_mean_squared_error: 0.9709
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.8029 - root_mean_squared_error: 0.8961
Epoch 2: val_root_mean_squared_error did not improve from 0.97094
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5264 - root_mean_squared_error: 0.7208 - val_loss: 1.0056 - val_root_mean_squared_error: 1.0028
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7086,2.805853,1.675068,1.497005,2.805853
46,2.1451,0.743931,0.862514,0.691009,0.743931
69,1.1747,0.552664,0.743414,0.582102,0.552664


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 6.8635 - root_mean_squared_error: 2.6198
Epoch 1: val_root_mean_squared_error improved from inf to 0.43810, saving model to cache/ensemble_camembert-base/models/mlp/e2a0523e902b4fa55bc61bb26cb278bb453811177034abbde9401cfb09771f10_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.6440 - root_mean_squared_error: 1.5579 - val_loss: 0.1919 - val_root_mean_squared_error: 0.4381
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2243 - root_mean_squared_error: 0.4736
Epoch 2: val_root_mean_squared_error improved from 0.43810 to 0.31394, saving model to cache/ensemble_camembert-base/models/mlp/e2a0523e902b4fa55bc61bb26cb278bb453811177034abbde9401cfb09771f10_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6419 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5164,3.845048,1.960879,1.774795,3.845048
46,2.0301,1.08924,1.043666,0.844713,1.08924
69,0.776,0.711982,0.84379,0.700062,0.711982


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 7.5330 - root_mean_squared_error: 2.7446
Epoch 1: val_root_mean_squared_error improved from inf to 0.84342, saving model to cache/ensemble_camembert-base/models/mlp/29c90b483b02e6177a1a12c987aec1a1e8281ffc7d477fc08cfe781a9d709a36_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0247 - root_mean_squared_error: 1.6676 - val_loss: 0.7114 - val_root_mean_squared_error: 0.8434
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5376 - root_mean_squared_error: 0.7332
Epoch 2: val_root_mean_squared_error improved from 0.84342 to 0.49901, saving model to cache/ensemble_camembert-base/models/mlp/29c90b483b02e6177a1a12c987aec1a1e8281ffc7d477fc08cfe781a9d709a36_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3662 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4277,4.236801,2.058349,1.88355,4.236801
46,2.209,1.419514,1.191434,0.95427,1.419514
69,1.0206,0.806127,0.897846,0.655788,0.806127


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 6.4497 - root_mean_squared_error: 2.5396
Epoch 1: val_root_mean_squared_error improved from inf to 0.57730, saving model to cache/ensemble_camembert-base/models/mlp/5e4f0c82c76c1de4ecc7bac97603b913aac8745333016a7294efc52160b5c5d9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6187 - root_mean_squared_error: 1.5471 - val_loss: 0.3333 - val_root_mean_squared_error: 0.5773
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.4330 - root_mean_squared_error: 0.6580
Epoch 2: val_root_mean_squared_error improved from 0.57730 to 0.53629, saving model to cache/ensemble_camembert-base/models/mlp/5e4f0c82c76c1de4ecc7bac97603b913aac8745333016a7294efc52160b5c5d9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6700 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0111,3.906209,1.976413,1.796587,3.906209
46,2.1865,1.220993,1.104985,0.895035,1.220993
69,0.8225,0.733465,0.856425,0.666228,0.733465


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 5.6215 - root_mean_squared_error: 2.3710
Epoch 1: val_root_mean_squared_error improved from inf to 0.71824, saving model to cache/ensemble_camembert-base/models/mlp/2417b66244af6950d394b792a0879e5baa52df8057357916af4a1fe9cd9f3884_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6460 - root_mean_squared_error: 1.5599 - val_loss: 0.5159 - val_root_mean_squared_error: 0.7182
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7000 - root_mean_squared_error: 0.8367
Epoch 2: val_root_mean_squared_error improved from 0.71824 to 0.37291, saving model to cache/ensemble_camembert-base/models/mlp/2417b66244af6950d394b792a0879e5baa52df8057357916af4a1fe9cd9f3884_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6450 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3881,4.070811,2.017625,1.735697,4.070812
46,2.2355,1.564189,1.250675,0.947855,1.564189
69,1.0458,1.095725,1.046769,0.847265,1.095725


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 9.3092 - root_mean_squared_error: 3.0511
Epoch 1: val_root_mean_squared_error improved from inf to 0.68424, saving model to cache/ensemble_camembert-base/models/mlp/8685d5ee8bd3e79fd0dbf4432f172dbbe03105c5dfba450cc744a927bd73c7d3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7656 - root_mean_squared_error: 1.5753 - val_loss: 0.4682 - val_root_mean_squared_error: 0.6842
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3846 - root_mean_squared_error: 0.6201
Epoch 2: val_root_mean_squared_error improved from 0.68424 to 0.43643, saving model to cache/ensemble_camembert-base/models/mlp/8685d5ee8bd3e79fd0dbf4432f172dbbe03105c5dfba450cc744a927bd73c7d3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4343 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8155,2.9155,1.707483,1.53485,2.9155
46,2.2283,0.787965,0.887674,0.703827,0.787965
69,0.9137,0.557687,0.746784,0.582517,0.557687


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 5.0696 - root_mean_squared_error: 2.2516
Epoch 1: val_root_mean_squared_error improved from inf to 0.59260, saving model to cache/ensemble_camembert-base/models/mlp/8a4814e4aec03a6479b9ec24494843818b497ad7e712337f8943760b856168f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8216 - root_mean_squared_error: 1.6184 - val_loss: 0.3512 - val_root_mean_squared_error: 0.5926
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2610 - root_mean_squared_error: 0.5108
Epoch 2: val_root_mean_squared_error did not improve from 0.59260
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4130 - root_mean_squared_error: 0.6404 - val_loss: 0.4909 - val_root_mean_squared_error: 0.7006
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7475,2.458515,1.567965,1.326021,2.458515
46,1.9624,0.768131,0.876431,0.716417,0.768131
69,0.8652,0.756325,0.869669,0.752357,0.756325


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 646ms/step - loss: 6.0375 - root_mean_squared_error: 2.4571
Epoch 1: val_root_mean_squared_error improved from inf to 0.69185, saving model to cache/ensemble_camembert-base/models/mlp/091c75152f6299a2a53f8c4354dbe816ca45e6e866b8e88328b36ab63710f859_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4920 - root_mean_squared_error: 1.5118 - val_loss: 0.4787 - val_root_mean_squared_error: 0.6919
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5737 - root_mean_squared_error: 0.7574
Epoch 2: val_root_mean_squared_error improved from 0.69185 to 0.38034, saving model to cache/ensemble_camembert-base/models/mlp/091c75152f6299a2a53f8c4354dbe816ca45e6e866b8e88328b36ab63710f859_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6403 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3826,3.603707,1.898343,1.651021,3.603707
46,1.97,1.27727,1.130164,0.929268,1.27727
69,0.9227,0.882685,0.939513,0.813675,0.882685


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 5.2586 - root_mean_squared_error: 2.2932
Epoch 1: val_root_mean_squared_error improved from inf to 0.54893, saving model to cache/ensemble_camembert-base/models/mlp/783025a51c2d51d131a8dd0b535c8c8be204f5884b4e643d90ebbe147d95b624_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6497 - root_mean_squared_error: 1.5679 - val_loss: 0.3013 - val_root_mean_squared_error: 0.5489
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.3279 - root_mean_squared_error: 0.5726
Epoch 2: val_root_mean_squared_error improved from 0.54893 to 0.42400, saving model to cache/ensemble_camembert-base/models/mlp/783025a51c2d51d131a8dd0b535c8c8be204f5884b4e643d90ebbe147d95b624_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4260 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7438,2.948416,1.717095,1.534636,2.948416
46,1.898,0.766021,0.875226,0.674385,0.766021
69,1.0017,0.599254,0.774115,0.637413,0.599254


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 5.8890 - root_mean_squared_error: 2.4267
Epoch 1: val_root_mean_squared_error improved from inf to 1.04836, saving model to cache/ensemble_camembert-base/models/mlp/20dc5a7c30b972d5f46a904ff2f2a465814973d60c99937698383c116f779ce3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6229 - root_mean_squared_error: 1.5585 - val_loss: 1.0991 - val_root_mean_squared_error: 1.0484
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.1571 - root_mean_squared_error: 1.0757
Epoch 2: val_root_mean_squared_error improved from 1.04836 to 0.74665, saving model to cache/ensemble_camembert-base/models/mlp/20dc5a7c30b972d5f46a904ff2f2a465814973d60c99937698383c116f779ce3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6693 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.537,3.631567,1.905667,1.740428,3.631567
46,2.2352,1.124814,1.060572,0.816774,1.124814
69,1.1392,0.650967,0.806825,0.62361,0.650967


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 6.8228 - root_mean_squared_error: 2.6121
Epoch 1: val_root_mean_squared_error improved from inf to 0.50318, saving model to cache/ensemble_camembert-base/models/mlp/82dea812ed409f71bd10886113d2c12dcbf6cf29487bc1a7021b481570f51114_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0153 - root_mean_squared_error: 1.6608 - val_loss: 0.2532 - val_root_mean_squared_error: 0.5032
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4263 - root_mean_squared_error: 0.6529
Epoch 2: val_root_mean_squared_error did not improve from 0.50318
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6659 - root_mean_squared_error: 0.8110 - val_loss: 0.2790 - val_root_mean_squared_error: 0.5282
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.093,3.149517,1.774688,1.60915,3.149517
46,2.0309,0.87797,0.937001,0.732825,0.87797
69,0.9518,0.558577,0.74738,0.536316,0.558577


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 644ms/step - loss: 8.5033 - root_mean_squared_error: 2.9160
Epoch 1: val_root_mean_squared_error improved from inf to 1.22734, saving model to cache/ensemble_camembert-base/models/mlp/8a8daf6234242730580b965d0e73408c163a9bf53472ff432856524258bdebae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8261 - root_mean_squared_error: 1.5977 - val_loss: 1.5064 - val_root_mean_squared_error: 1.2273
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.3293 - root_mean_squared_error: 1.1529
Epoch 2: val_root_mean_squared_error improved from 1.22734 to 0.87964, saving model to cache/ensemble_camembert-base/models/mlp/8a8daf6234242730580b965d0e73408c163a9bf53472ff432856524258bdebae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6298 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2475,3.665974,1.914673,1.706954,3.665974
46,2.3184,1.241681,1.114307,0.826002,1.241681
69,1.3528,0.792091,0.889995,0.680941,0.792091


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 7.4676 - root_mean_squared_error: 2.7327
Epoch 1: val_root_mean_squared_error improved from inf to 0.58597, saving model to cache/ensemble_camembert-base/models/mlp/b9ff93eab94429be29c2d0f602b9728456f15e2cfc8b7a863fccda512aff9267_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0997 - root_mean_squared_error: 1.6820 - val_loss: 0.3434 - val_root_mean_squared_error: 0.5860
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3068 - root_mean_squared_error: 0.5539
Epoch 2: val_root_mean_squared_error improved from 0.58597 to 0.32779, saving model to cache/ensemble_camembert-base/models/mlp/b9ff93eab94429be29c2d0f602b9728456f15e2cfc8b7a863fccda512aff9267_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4934 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9407,2.898256,1.702426,1.458519,2.898256
46,2.1699,0.971524,0.985659,0.730354,0.971524
69,1.0776,0.774039,0.879795,0.653653,0.774039


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 7.5874 - root_mean_squared_error: 2.7545
Epoch 1: val_root_mean_squared_error improved from inf to 0.46172, saving model to cache/ensemble_camembert-base/models/mlp/b57476542ec5fa69960f61c669bcff9d4cbead69e96f6b365fe6e619f49f7986_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8931 - root_mean_squared_error: 1.6288 - val_loss: 0.2132 - val_root_mean_squared_error: 0.4617
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1995 - root_mean_squared_error: 0.4466
Epoch 2: val_root_mean_squared_error improved from 0.46172 to 0.34320, saving model to cache/ensemble_camembert-base/models/mlp/b57476542ec5fa69960f61c669bcff9d4cbead69e96f6b365fe6e619f49f7986_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3359 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6204,4.108561,2.026958,1.827285,4.10856
46,2.3697,1.337858,1.156658,0.956538,1.337858
69,0.9403,0.833088,0.912736,0.757934,0.833088


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 5.9076 - root_mean_squared_error: 2.4306
Epoch 1: val_root_mean_squared_error improved from inf to 0.61003, saving model to cache/ensemble_camembert-base/models/mlp/b1231c7726e942267345aed23b6c4e106ac628982b647fc0eb1020381072b8dd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4807 - root_mean_squared_error: 1.5130 - val_loss: 0.3721 - val_root_mean_squared_error: 0.6100
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2174 - root_mean_squared_error: 0.4662
Epoch 2: val_root_mean_squared_error improved from 0.61003 to 0.50923, saving model to cache/ensemble_camembert-base/models/mlp/b1231c7726e942267345aed23b6c4e106ac628982b647fc0eb1020381072b8dd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4837 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1433,4.707786,2.169743,1.984031,4.707786
46,2.1926,1.762171,1.327468,1.091879,1.762171
69,0.9491,1.031544,1.01565,0.819463,1.031544


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 5.3596 - root_mean_squared_error: 2.3151
Epoch 1: val_root_mean_squared_error improved from inf to 0.48328, saving model to cache/ensemble_camembert-base/models/mlp/1c438edd9402e6c277a20b48bd7bdba653bbc56e86af9574031067c220a9e75c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5443 - root_mean_squared_error: 1.5399 - val_loss: 0.2336 - val_root_mean_squared_error: 0.4833
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3206 - root_mean_squared_error: 0.5662
Epoch 2: val_root_mean_squared_error improved from 0.48328 to 0.36578, saving model to cache/ensemble_camembert-base/models/mlp/1c438edd9402e6c277a20b48bd7bdba653bbc56e86af9574031067c220a9e75c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4155 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3138,4.28266,2.069459,1.880774,4.28266
46,1.9102,1.366916,1.169152,0.959324,1.366916
69,0.8806,0.820016,0.905547,0.7695,0.820016


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 9.9054 - root_mean_squared_error: 3.1473
Epoch 1: val_root_mean_squared_error improved from inf to 0.55664, saving model to cache/ensemble_camembert-base/models/mlp/01d7f990a55e81391ea51d5c409c3dfb6918dda7e81097cb1c3dfd2e449fc07d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1134 - root_mean_squared_error: 1.6789 - val_loss: 0.3099 - val_root_mean_squared_error: 0.5566
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.5221 - root_mean_squared_error: 0.7226
Epoch 2: val_root_mean_squared_error improved from 0.55664 to 0.40783, saving model to cache/ensemble_camembert-base/models/mlp/01d7f990a55e81391ea51d5c409c3dfb6918dda7e81097cb1c3dfd2e449fc07d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5375 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8211,2.521299,1.58786,1.367841,2.521299
46,1.9123,0.746935,0.864254,0.605525,0.746935
69,0.9557,0.669315,0.818117,0.629563,0.669315


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 6.7950 - root_mean_squared_error: 2.6067
Epoch 1: val_root_mean_squared_error improved from inf to 0.46980, saving model to cache/ensemble_camembert-base/models/mlp/adcded9736c4e074320477ff3acef76e782c7d2d394e8cb9e6f8d85873de9223_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4747 - root_mean_squared_error: 1.5039 - val_loss: 0.2207 - val_root_mean_squared_error: 0.4698
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2755 - root_mean_squared_error: 0.5249
Epoch 2: val_root_mean_squared_error improved from 0.46980 to 0.35276, saving model to cache/ensemble_camembert-base/models/mlp/adcded9736c4e074320477ff3acef76e782c7d2d394e8cb9e6f8d85873de9223_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3397 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1727,2.808821,1.675954,1.489496,2.808821
46,2.2663,0.805205,0.897332,0.706821,0.805204
69,0.9022,0.579869,0.761491,0.655613,0.579869


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 6.4292 - root_mean_squared_error: 2.5356
Epoch 1: val_root_mean_squared_error improved from inf to 0.55777, saving model to cache/ensemble_camembert-base/models/mlp/c932e92e6feee931ae283d77f2f56af3425f1890a389b37f17ad2133a19f05ea_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5954 - root_mean_squared_error: 1.5429 - val_loss: 0.3111 - val_root_mean_squared_error: 0.5578
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7533 - root_mean_squared_error: 0.8679
Epoch 2: val_root_mean_squared_error improved from 0.55777 to 0.32383, saving model to cache/ensemble_camembert-base/models/mlp/c932e92e6feee931ae283d77f2f56af3425f1890a389b37f17ad2133a19f05ea_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6121 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2719,5.357275,2.314579,2.145357,5.357275
46,2.103,1.970075,1.403594,1.156953,1.970075
69,1.0809,1.101659,1.049599,0.830306,1.101659


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 9.6828 - root_mean_squared_error: 3.1117
Epoch 1: val_root_mean_squared_error improved from inf to 0.81926, saving model to cache/ensemble_camembert-base/models/mlp/e51732a324a961fc74df5bc4e3432232d137f193ffaa79459e6941ab34f67eda_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7202 - root_mean_squared_error: 1.5619 - val_loss: 0.6712 - val_root_mean_squared_error: 0.8193
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.1596 - root_mean_squared_error: 1.0769
Epoch 2: val_root_mean_squared_error improved from 0.81926 to 0.72857, saving model to cache/ensemble_camembert-base/models/mlp/e51732a324a961fc74df5bc4e3432232d137f193ffaa79459e6941ab34f67eda_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6203 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0727,4.080078,2.01992,1.830451,4.080078
46,2.1096,1.275128,1.129216,0.871955,1.275128
69,0.9197,0.78547,0.886267,0.681049,0.78547


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 644ms/step - loss: 6.6035 - root_mean_squared_error: 2.5697
Epoch 1: val_root_mean_squared_error improved from inf to 0.83950, saving model to cache/ensemble_camembert-base/models/mlp/9fc2278b9fb032f58cd23e27c5f396b0a6db8587018b115d0d04cc171dba8904_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8884 - root_mean_squared_error: 1.6277 - val_loss: 0.7048 - val_root_mean_squared_error: 0.8395
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4262 - root_mean_squared_error: 0.6528
Epoch 2: val_root_mean_squared_error improved from 0.83950 to 0.49433, saving model to cache/ensemble_camembert-base/models/mlp/9fc2278b9fb032f58cd23e27c5f396b0a6db8587018b115d0d04cc171dba8904_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3479 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.867,3.024223,1.739029,1.554825,3.024223
46,2.1968,0.867367,0.931326,0.699639,0.867367
69,1.161,0.599029,0.77397,0.607803,0.599029


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 7.3069 - root_mean_squared_error: 2.7031
Epoch 1: val_root_mean_squared_error improved from inf to 0.40557, saving model to cache/ensemble_camembert-base/models/mlp/4b14a9b31868759a36dad1ae32f1121755df9f3cf2646e1e78ec148acbe1baf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3659 - root_mean_squared_error: 1.4737 - val_loss: 0.1645 - val_root_mean_squared_error: 0.4056
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1788 - root_mean_squared_error: 0.4229
Epoch 2: val_root_mean_squared_error improved from 0.40557 to 0.31945, saving model to cache/ensemble_camembert-base/models/mlp/4b14a9b31868759a36dad1ae32f1121755df9f3cf2646e1e78ec148acbe1baf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4008 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7445,4.512669,2.124304,1.899813,4.512669
46,2.3495,1.743018,1.320234,1.056615,1.743018
69,1.0874,1.089986,1.044024,0.821553,1.089986


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 7.6803 - root_mean_squared_error: 2.7713
Epoch 1: val_root_mean_squared_error improved from inf to 0.65025, saving model to cache/ensemble_camembert-base/models/mlp/6a317fed24385a19e50a76c87e2e9bdf452604069dca5e47ce84b9d420822dc3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7579 - root_mean_squared_error: 1.5815 - val_loss: 0.4228 - val_root_mean_squared_error: 0.6502
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3473 - root_mean_squared_error: 0.5893
Epoch 2: val_root_mean_squared_error did not improve from 0.65025
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5359 - root_mean_squared_error: 0.7271 - val_loss: 0.4988 - val_root_mean_squared_error: 0.7062
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5727,3.41429,1.84778,1.659076,3.41429
46,2.4049,1.00481,1.002402,0.804743,1.00481
69,0.9767,0.659448,0.812064,0.660245,0.659448


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 8.5297 - root_mean_squared_error: 2.9206
Epoch 1: val_root_mean_squared_error improved from inf to 0.56158, saving model to cache/ensemble_camembert-base/models/mlp/f4d6ad45a2f846178e015ed274b2c46dc44bf61d323a00ba1bf31acce30441f4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2428 - root_mean_squared_error: 1.7152 - val_loss: 0.3154 - val_root_mean_squared_error: 0.5616
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3879 - root_mean_squared_error: 0.6228
Epoch 2: val_root_mean_squared_error improved from 0.56158 to 0.45441, saving model to cache/ensemble_camembert-base/models/mlp/f4d6ad45a2f846178e015ed274b2c46dc44bf61d323a00ba1bf31acce30441f4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4564 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2494,3.205378,1.790357,1.666467,3.205378
46,2.0993,0.765331,0.874832,0.711681,0.765331
69,0.7788,0.430653,0.656242,0.540659,0.430653


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 7.1475 - root_mean_squared_error: 2.6735
Epoch 1: val_root_mean_squared_error improved from inf to 0.59773, saving model to cache/ensemble_camembert-base/models/mlp/5b836f84ffb4de512943c48d4d8d6886b250d16c0aa73b80ead9497c03af9a03_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9096 - root_mean_squared_error: 1.6348 - val_loss: 0.3573 - val_root_mean_squared_error: 0.5977
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.4127 - root_mean_squared_error: 0.6424
Epoch 2: val_root_mean_squared_error improved from 0.59773 to 0.50662, saving model to cache/ensemble_camembert-base/models/mlp/5b836f84ffb4de512943c48d4d8d6886b250d16c0aa73b80ead9497c03af9a03_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4046 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5079,2.699123,1.642901,1.432618,2.699123
46,1.7495,0.776644,0.881274,0.68432,0.776644
69,1.0845,0.655854,0.809848,0.612674,0.655854


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 8.6992 - root_mean_squared_error: 2.9494
Epoch 1: val_root_mean_squared_error improved from inf to 0.55147, saving model to cache/ensemble_camembert-base/models/mlp/2d7d88ddd778d3577f4c03e1b87367fca288452ca48d42c8edba470f93ac6d9f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.5334 - root_mean_squared_error: 1.7930 - val_loss: 0.3041 - val_root_mean_squared_error: 0.5515
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5574 - root_mean_squared_error: 0.7466
Epoch 2: val_root_mean_squared_error did not improve from 0.55147
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4083 - root_mean_squared_error: 0.6366 - val_loss: 1.3915 - val_root_mean_squared_error: 1.1796
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9815,3.261782,1.80604,1.641245,3.261782
46,2.4444,0.96516,0.982425,0.729991,0.96516
69,1.0618,0.588312,0.767015,0.527724,0.588312


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 8.5057 - root_mean_squared_error: 2.9165
Epoch 1: val_root_mean_squared_error improved from inf to 0.58429, saving model to cache/ensemble_camembert-base/models/mlp/729d3fc844a465cd30367969b82299cf786732c878aadace3f4321e8ece7baab_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9668 - root_mean_squared_error: 1.6413 - val_loss: 0.3414 - val_root_mean_squared_error: 0.5843
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2526 - root_mean_squared_error: 0.5026
Epoch 2: val_root_mean_squared_error did not improve from 0.58429
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3910 - root_mean_squared_error: 0.6231 - val_loss: 0.4935 - val_root_mean_squared_error: 0.7025
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8569,4.109508,2.027192,1.871188,4.109507
46,2.4931,1.412005,1.188278,0.949035,1.412005
69,1.1707,0.758689,0.871028,0.668577,0.758689


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 6.9348 - root_mean_squared_error: 2.6334
Epoch 1: val_root_mean_squared_error improved from inf to 0.43712, saving model to cache/ensemble_camembert-base/models/mlp/05f918c72a8624909f3a4048fa1ed18b5b836787eb3fffc3abd94bf7a9cc60f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7714 - root_mean_squared_error: 1.5897 - val_loss: 0.1911 - val_root_mean_squared_error: 0.4371
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1828 - root_mean_squared_error: 0.4275
Epoch 2: val_root_mean_squared_error did not improve from 0.43712
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4382 - root_mean_squared_error: 0.6547 - val_loss: 0.2148 - val_root_mean_squared_error: 0.4634
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9973,3.509107,1.873261,1.708477,3.509107
46,2.3544,1.006784,1.003386,0.791286,1.006784
69,0.9425,0.609052,0.780418,0.615056,0.609052


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 4.0739 - root_mean_squared_error: 2.0184
Epoch 1: val_root_mean_squared_error improved from inf to 0.64666, saving model to cache/ensemble_camembert-base/models/mlp/8b0a576710bd4fd4b5c445d90ace9a8c336879b7d5624fc8f538747883b9bf43_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6619 - root_mean_squared_error: 1.5756 - val_loss: 0.4182 - val_root_mean_squared_error: 0.6467
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2547 - root_mean_squared_error: 0.5047
Epoch 2: val_root_mean_squared_error did not improve from 0.64666
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3346 - root_mean_squared_error: 0.5773 - val_loss: 0.7298 - val_root_mean_squared_error: 0.8543
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3915,4.232779,2.057372,1.823327,4.232779
46,2.1338,1.507639,1.22786,0.980565,1.507639
69,0.9818,0.968884,0.984319,0.820957,0.968884


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 8.9501 - root_mean_squared_error: 2.9917
Epoch 1: val_root_mean_squared_error improved from inf to 0.91452, saving model to cache/ensemble_camembert-base/models/mlp/2e4074fc3daac0cf8624b261fd1b51eede9b79ed28cb22d3f62157c62f002976_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9223 - root_mean_squared_error: 1.6224 - val_loss: 0.8364 - val_root_mean_squared_error: 0.9145
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5792 - root_mean_squared_error: 0.7610
Epoch 2: val_root_mean_squared_error improved from 0.91452 to 0.75918, saving model to cache/ensemble_camembert-base/models/mlp/2e4074fc3daac0cf8624b261fd1b51eede9b79ed28cb22d3f62157c62f002976_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4320 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0968,3.428321,1.851573,1.646247,3.428321
46,1.9524,1.024133,1.011995,0.733935,1.024134
69,0.868,0.720104,0.848589,0.650359,0.720104


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 6.7024 - root_mean_squared_error: 2.5889
Epoch 1: val_root_mean_squared_error improved from inf to 1.04593, saving model to cache/ensemble_camembert-base/models/mlp/d650433b311afb0f5a7ee9d54643396114b91dc70209a0dadce419cbfd82b09c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7200 - root_mean_squared_error: 1.5836 - val_loss: 1.0940 - val_root_mean_squared_error: 1.0459
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.5337 - root_mean_squared_error: 1.2384
Epoch 2: val_root_mean_squared_error improved from 1.04593 to 0.45285, saving model to cache/ensemble_camembert-base/models/mlp/d650433b311afb0f5a7ee9d54643396114b91dc70209a0dadce419cbfd82b09c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7198 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0695,3.277918,1.810502,1.61821,3.277918
46,2.1497,0.977477,0.988674,0.790284,0.977477
69,1.0188,0.664816,0.815362,0.634412,0.664816


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 6.2949 - root_mean_squared_error: 2.5090
Epoch 1: val_root_mean_squared_error improved from inf to 0.83918, saving model to cache/ensemble_camembert-base/models/mlp/32a7c1c4400e3bfc1d50effd75720a5b90065021af5f677f4bd56d519bf56cfb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.5856 - root_mean_squared_error: 1.5397 - val_loss: 0.7042 - val_root_mean_squared_error: 0.8392
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9538 - root_mean_squared_error: 0.9766
Epoch 2: val_root_mean_squared_error did not improve from 0.83918
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6186 - root_mean_squared_error: 0.7740 - val_loss: 0.9754 - val_root_mean_squared_error: 0.9876
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6866,5.131858,2.265361,1.990697,5.131858
46,2.251,2.040502,1.428461,1.058222,2.040502
69,0.8893,1.349771,1.161797,0.895951,1.349771


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 6.1300 - root_mean_squared_error: 2.4759
Epoch 1: val_root_mean_squared_error improved from inf to 0.84068, saving model to cache/ensemble_camembert-base/models/mlp/d329d22ae60ae3705b140afc4a0835d8e39cb1cdd5fe908c53d0975c9cf00374_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2886 - root_mean_squared_error: 1.7399 - val_loss: 0.7067 - val_root_mean_squared_error: 0.8407
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8648 - root_mean_squared_error: 0.9300
Epoch 2: val_root_mean_squared_error did not improve from 0.84068
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5295 - root_mean_squared_error: 0.7211 - val_loss: 0.9782 - val_root_mean_squared_error: 0.9890
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.657,3.838789,1.959283,1.735345,3.838789
46,2.4093,1.38129,1.175283,0.942461,1.38129
69,1.3881,0.883648,0.940025,0.765092,0.883648


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 647ms/step - loss: 5.9842 - root_mean_squared_error: 2.4463
Epoch 1: val_root_mean_squared_error improved from inf to 0.56957, saving model to cache/ensemble_camembert-base/models/mlp/c54a4b2764d57144aa35a81b18fd59f49ca3a2774a3a5a34ac0fb1c7a1bc4a05_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8625 - root_mean_squared_error: 1.6231 - val_loss: 0.3244 - val_root_mean_squared_error: 0.5696
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3571 - root_mean_squared_error: 0.5976
Epoch 2: val_root_mean_squared_error improved from 0.56957 to 0.51095, saving model to cache/ensemble_camembert-base/models/mlp/c54a4b2764d57144aa35a81b18fd59f49ca3a2774a3a5a34ac0fb1c7a1bc4a05_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3741 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2058,3.24996,1.802765,1.623069,3.24996
46,2.3295,0.898334,0.947805,0.774782,0.898334
69,1.0651,0.619549,0.787115,0.62221,0.619549


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 5.9864 - root_mean_squared_error: 2.4467
Epoch 1: val_root_mean_squared_error improved from inf to 1.13751, saving model to cache/ensemble_camembert-base/models/mlp/cddb51286605f923863923afd27b5cf3e2ddd418200d8aa790ac0918441fe434_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.2806 - root_mean_squared_error: 1.4473 - val_loss: 1.2939 - val_root_mean_squared_error: 1.1375
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4770 - root_mean_squared_error: 0.6907
Epoch 2: val_root_mean_squared_error did not improve from 1.13751
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6366 - root_mean_squared_error: 0.7894 - val_loss: 1.3035 - val_root_mean_squared_error: 1.1417
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5862,2.474968,1.573203,1.353448,2.474969
46,2.3128,0.765886,0.875149,0.637645,0.765886
69,1.3367,0.645889,0.803672,0.668654,0.645889


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 713ms/step - loss: 7.2242 - root_mean_squared_error: 2.6878
Epoch 1: val_root_mean_squared_error improved from inf to 1.06004, saving model to cache/ensemble_camembert-base/models/mlp/7a6ff45552d5717363278bc86c6a4621e4b99319cebd31724ce412890d8afd88_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1690 - root_mean_squared_error: 1.7017 - val_loss: 1.1237 - val_root_mean_squared_error: 1.0600
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.8630 - root_mean_squared_error: 0.9290
Epoch 2: val_root_mean_squared_error improved from 1.06004 to 0.44763, saving model to cache/ensemble_camembert-base/models/mlp/7a6ff45552d5717363278bc86c6a4621e4b99319cebd31724ce412890d8afd88_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5235 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.8533,3.549107,1.883907,1.639313,3.549107
46,2.0145,1.222054,1.105465,0.810819,1.222054
69,0.8737,0.864192,0.929619,0.714564,0.864192


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 5.4723 - root_mean_squared_error: 2.3393
Epoch 1: val_root_mean_squared_error improved from inf to 0.45272, saving model to cache/ensemble_camembert-base/models/mlp/2cf120b390bcc9aab0703bbf42c3fe38f3f194bbf3110a83c6dd749fe91ce56f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.1613 - root_mean_squared_error: 1.4144 - val_loss: 0.2050 - val_root_mean_squared_error: 0.4527
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1895 - root_mean_squared_error: 0.4353
Epoch 2: val_root_mean_squared_error improved from 0.45272 to 0.40252, saving model to cache/ensemble_camembert-base/models/mlp/2cf120b390bcc9aab0703bbf42c3fe38f3f194bbf3110a83c6dd749fe91ce56f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3962 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3283,4.342715,2.083918,1.80832,4.342715
46,2.6186,1.789628,1.33777,0.930363,1.789628
69,1.2133,1.202388,1.096535,0.81034,1.202388


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 9.4338 - root_mean_squared_error: 3.0714
Epoch 1: val_root_mean_squared_error improved from inf to 0.74457, saving model to cache/ensemble_camembert-base/models/mlp/1317f9ea9be7bd9900517cbccd77e6f78d6edfe96bca75952893a566ae04cd40_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2492 - root_mean_squared_error: 1.7110 - val_loss: 0.5544 - val_root_mean_squared_error: 0.7446
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7263 - root_mean_squared_error: 0.8522
Epoch 2: val_root_mean_squared_error did not improve from 0.74457
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6599 - root_mean_squared_error: 0.8043 - val_loss: 1.6029 - val_root_mean_squared_error: 1.2660
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.446,4.132015,2.032736,1.835166,4.132015
46,2.4225,1.363672,1.167764,0.931717,1.363673
69,1.2144,0.821295,0.906254,0.697912,0.821295


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 5.8835 - root_mean_squared_error: 2.4256
Epoch 1: val_root_mean_squared_error improved from inf to 0.87213, saving model to cache/ensemble_camembert-base/models/mlp/701dbc40f09fd92c680961214a1a63a3c54cd1a3cb7be7f2ebaf57173e48dc94_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3949 - root_mean_squared_error: 1.4861 - val_loss: 0.7606 - val_root_mean_squared_error: 0.8721
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.0370 - root_mean_squared_error: 1.0183
Epoch 2: val_root_mean_squared_error improved from 0.87213 to 0.77511, saving model to cache/ensemble_camembert-base/models/mlp/701dbc40f09fd92c680961214a1a63a3c54cd1a3cb7be7f2ebaf57173e48dc94_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7753 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7648,3.876602,1.968909,1.788564,3.876602
46,2.7286,1.269213,1.126593,0.882821,1.269213
69,1.2905,0.744458,0.86282,0.630656,0.744458


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 9.4851 - root_mean_squared_error: 3.0798
Epoch 1: val_root_mean_squared_error improved from inf to 0.84473, saving model to cache/ensemble_camembert-base/models/mlp/92035b5d01c00a7e8fe793e43cafc57db5bb34a479141d42b29969334e7bf716_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0077 - root_mean_squared_error: 1.6526 - val_loss: 0.7136 - val_root_mean_squared_error: 0.8447
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.7491 - root_mean_squared_error: 0.8655
Epoch 2: val_root_mean_squared_error improved from 0.84473 to 0.71780, saving model to cache/ensemble_camembert-base/models/mlp/92035b5d01c00a7e8fe793e43cafc57db5bb34a479141d42b29969334e7bf716_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4942 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.973,3.530439,1.878946,1.671541,3.53044
46,2.7023,1.161899,1.077914,0.833726,1.161899
69,1.0122,0.760876,0.872282,0.704765,0.760876


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.5308 - root_mean_squared_error: 2.7442
Epoch 1: val_root_mean_squared_error improved from inf to 1.01504, saving model to cache/ensemble_camembert-base/models/mlp/e9bc56bdc668459292acc9d2bed56e5f9d44bea0021e35b346d0ba93bcc1987d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4765 - root_mean_squared_error: 1.7840 - val_loss: 1.0303 - val_root_mean_squared_error: 1.0150
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.6369 - root_mean_squared_error: 0.7981
Epoch 2: val_root_mean_squared_error did not improve from 1.01504
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5442 - root_mean_squared_error: 0.7367 - val_loss: 1.0513 - val_root_mean_squared_error: 1.0253
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0139,3.714699,1.927355,1.677497,3.714699
46,2.3553,1.277505,1.130267,0.881809,1.277504
69,1.2006,0.898156,0.947711,0.73452,0.898156


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 700ms/step - loss: 5.8764 - root_mean_squared_error: 2.4241
Epoch 1: val_root_mean_squared_error improved from inf to 0.67152, saving model to cache/ensemble_camembert-base/models/mlp/42d2e3518cfd5a1ec6525ff8a1e5fba0fea095bb2cada4e17fcf5cf314d404b2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6589 - root_mean_squared_error: 1.5764 - val_loss: 0.4509 - val_root_mean_squared_error: 0.6715
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1622 - root_mean_squared_error: 0.4028
Epoch 2: val_root_mean_squared_error did not improve from 0.67152
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5716 - root_mean_squared_error: 0.7477 - val_loss: 0.4948 - val_root_mean_squared_error: 0.7034
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7622,4.43598,2.106177,1.862492,4.43598
46,2.2067,1.551166,1.245458,0.972475,1.551166
69,0.9445,1.046792,1.023128,0.811298,1.046792


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.4114 - root_mean_squared_error: 2.7224
Epoch 1: val_root_mean_squared_error improved from inf to 0.88268, saving model to cache/ensemble_camembert-base/models/mlp/d76fa36962a3a7b8dbaababfc4bb626211eccd9f7b4e179a0034a461d0a907f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2130 - root_mean_squared_error: 1.7237 - val_loss: 0.7791 - val_root_mean_squared_error: 0.8827
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4304 - root_mean_squared_error: 0.6560
Epoch 2: val_root_mean_squared_error improved from 0.88268 to 0.61554, saving model to cache/ensemble_camembert-base/models/mlp/d76fa36962a3a7b8dbaababfc4bb626211eccd9f7b4e179a0034a461d0a907f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4516 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6656,5.116568,2.261983,2.094633,5.116568
46,2.5428,1.857788,1.363007,1.112646,1.857788
69,1.166,1.033319,1.016523,0.763249,1.033319


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.1595 - root_mean_squared_error: 2.6757
Epoch 1: val_root_mean_squared_error improved from inf to 0.72649, saving model to cache/ensemble_camembert-base/models/mlp/bf22f9fee59e97230271e6f6bb46c42e8d0d6f582757583ed547c11eaa6567e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1268 - root_mean_squared_error: 1.6912 - val_loss: 0.5278 - val_root_mean_squared_error: 0.7265
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6956 - root_mean_squared_error: 0.8340
Epoch 2: val_root_mean_squared_error improved from 0.72649 to 0.56636, saving model to cache/ensemble_camembert-base/models/mlp/bf22f9fee59e97230271e6f6bb46c42e8d0d6f582757583ed547c11eaa6567e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6049 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.136,4.744529,2.178194,1.949279,4.744528
46,2.4622,1.74005,1.31911,1.041031,1.74005
69,0.9915,1.096877,1.047319,0.804357,1.096877


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 7.2768 - root_mean_squared_error: 2.6976
Epoch 1: val_root_mean_squared_error improved from inf to 0.67840, saving model to cache/ensemble_camembert-base/models/mlp/d63f653d22091a00061a83204896cece2b5f80614b4cf1d648815be5f7c16796_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9660 - root_mean_squared_error: 1.6461 - val_loss: 0.4602 - val_root_mean_squared_error: 0.6784
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4504 - root_mean_squared_error: 0.6711
Epoch 2: val_root_mean_squared_error improved from 0.67840 to 0.44545, saving model to cache/ensemble_camembert-base/models/mlp/d63f653d22091a00061a83204896cece2b5f80614b4cf1d648815be5f7c16796_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5685 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7425,4.802923,2.191557,1.873523,4.802923
46,2.2509,2.034458,1.426344,1.081219,2.034458
69,1.0951,1.422897,1.192852,0.971525,1.422896


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 9.0759 - root_mean_squared_error: 3.0126
Epoch 1: val_root_mean_squared_error improved from inf to 0.68177, saving model to cache/ensemble_camembert-base/models/mlp/9b8bb1f2957cdecfc3cf8eb772fa9ad458d9f6bc0519e49025c3d25577a338fa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7574 - root_mean_squared_error: 1.5820 - val_loss: 0.4648 - val_root_mean_squared_error: 0.6818
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4264 - root_mean_squared_error: 0.6530
Epoch 2: val_root_mean_squared_error improved from 0.68177 to 0.43492, saving model to cache/ensemble_camembert-base/models/mlp/9b8bb1f2957cdecfc3cf8eb772fa9ad458d9f6bc0519e49025c3d25577a338fa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4300 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9009,4.029664,2.007402,1.7324,4.029664
46,2.2714,1.496388,1.22327,0.931765,1.496389
69,0.9761,1.05789,1.028538,0.80091,1.05789


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 4.9392 - root_mean_squared_error: 2.2224
Epoch 1: val_root_mean_squared_error improved from inf to 0.60545, saving model to cache/ensemble_camembert-base/models/mlp/5d7cdf0936f0a4e25ee11292edec8c52bc105f1482c8aab7178f37df73c5e15d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0099 - root_mean_squared_error: 1.6739 - val_loss: 0.3666 - val_root_mean_squared_error: 0.6054
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3201 - root_mean_squared_error: 0.5658
Epoch 2: val_root_mean_squared_error did not improve from 0.60545
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4373 - root_mean_squared_error: 0.6567 - val_loss: 0.4197 - val_root_mean_squared_error: 0.6478
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4972,2.501667,1.581666,1.36522,2.501667
46,2.2659,0.733002,0.856155,0.719476,0.733002
69,0.9656,0.662167,0.813736,0.69606,0.662167


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 4.6168 - root_mean_squared_error: 2.1487
Epoch 1: val_root_mean_squared_error improved from inf to 0.84671, saving model to cache/ensemble_camembert-base/models/mlp/36cae4e60396940186b8a8af7df47a187e45d14a466c01cf6f5762720bea7086_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.4013 - root_mean_squared_error: 1.5022 - val_loss: 0.7169 - val_root_mean_squared_error: 0.8467
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.1036 - root_mean_squared_error: 1.0505
Epoch 2: val_root_mean_squared_error improved from 0.84671 to 0.49062, saving model to cache/ensemble_camembert-base/models/mlp/36cae4e60396940186b8a8af7df47a187e45d14a466c01cf6f5762720bea7086_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4686 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4929,3.410928,1.84687,1.578581,3.410928
46,2.3239,1.232031,1.109969,0.920701,1.232031
69,1.1514,0.909616,0.953738,0.836488,0.909616


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 723ms/step - loss: 5.1024 - root_mean_squared_error: 2.2589
Epoch 1: val_root_mean_squared_error improved from inf to 1.24217, saving model to cache/ensemble_camembert-base/models/mlp/52e40882792762da3af77a9f62e4a66946752ca458244acab20968447e296845_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6362 - root_mean_squared_error: 1.5699 - val_loss: 1.5430 - val_root_mean_squared_error: 1.2422
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 1.0399 - root_mean_squared_error: 1.0197
Epoch 2: val_root_mean_squared_error improved from 1.24217 to 0.68361, saving model to cache/ensemble_camembert-base/models/mlp/52e40882792762da3af77a9f62e4a66946752ca458244acab20968447e296845_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4701 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9036,2.761892,1.661894,1.486145,2.761892
46,2.1434,0.694918,0.833617,0.691952,0.694918
69,1.1739,0.569222,0.754468,0.636191,0.569222


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 7.0016 - root_mean_squared_error: 2.6461
Epoch 1: val_root_mean_squared_error improved from inf to 0.66381, saving model to cache/ensemble_camembert-base/models/mlp/d8a78bcc755f00beb30b0d66cdd1d080c47a38dc7dcb2bcf473dfd05ac6243a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9515 - root_mean_squared_error: 1.6569 - val_loss: 0.4406 - val_root_mean_squared_error: 0.6638
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4964 - root_mean_squared_error: 0.7046
Epoch 2: val_root_mean_squared_error improved from 0.66381 to 0.40967, saving model to cache/ensemble_camembert-base/models/mlp/d8a78bcc755f00beb30b0d66cdd1d080c47a38dc7dcb2bcf473dfd05ac6243a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5272 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5294,3.621415,1.903002,1.712399,3.621415
46,2.4847,1.196415,1.093808,0.868367,1.196415
69,1.1238,0.734992,0.857317,0.679272,0.734992


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 6.1333 - root_mean_squared_error: 2.4766
Epoch 1: val_root_mean_squared_error improved from inf to 0.82116, saving model to cache/ensemble_camembert-base/models/mlp/fe00b919cce02eb6972c2be680c2edf4a3a1c7a230cf3ffc08c72dc0be23c7c6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4519 - root_mean_squared_error: 1.5014 - val_loss: 0.6743 - val_root_mean_squared_error: 0.8212
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0930 - root_mean_squared_error: 1.0454
Epoch 2: val_root_mean_squared_error did not improve from 0.82116
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.0438 - root_mean_squared_error: 0.9989 - val_loss: 1.3641 - val_root_mean_squared_error: 1.1680
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1197,4.777836,2.185826,1.940838,4.777836
46,2.0929,1.811565,1.345944,1.050398,1.811565
69,0.9493,1.164647,1.079188,0.794648,1.164647


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 7.6069 - root_mean_squared_error: 2.7581
Epoch 1: val_root_mean_squared_error improved from inf to 0.87437, saving model to cache/ensemble_camembert-base/models/mlp/ab559aa5862b70a006233f7c6a0c8bf491458648d5a824224e0998757f9397cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8189 - root_mean_squared_error: 1.6079 - val_loss: 0.7645 - val_root_mean_squared_error: 0.8744
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.1782 - root_mean_squared_error: 1.0855
Epoch 2: val_root_mean_squared_error improved from 0.87437 to 0.45935, saving model to cache/ensemble_camembert-base/models/mlp/ab559aa5862b70a006233f7c6a0c8bf491458648d5a824224e0998757f9397cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5395 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4539,3.591911,1.895234,1.619167,3.591911
46,2.5626,1.348399,1.161206,0.876542,1.348398
69,1.3602,0.9787,0.989293,0.791949,0.9787


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.4400 - root_mean_squared_error: 2.7276
Epoch 1: val_root_mean_squared_error improved from inf to 0.70198, saving model to cache/ensemble_camembert-base/models/mlp/f568d769bb4c9d71a8e806e0215d40555f905eba45fdb0137b769bdf42302fad_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2312 - root_mean_squared_error: 1.7245 - val_loss: 0.4928 - val_root_mean_squared_error: 0.7020
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3118 - root_mean_squared_error: 0.5584
Epoch 2: val_root_mean_squared_error improved from 0.70198 to 0.51370, saving model to cache/ensemble_camembert-base/models/mlp/f568d769bb4c9d71a8e806e0215d40555f905eba45fdb0137b769bdf42302fad_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3952 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8956,2.977591,1.72557,1.413139,2.977591
46,2.3609,1.13606,1.065861,0.801906,1.13606
69,1.2614,0.991227,0.995604,0.809861,0.991227


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 685ms/step - loss: 7.3790 - root_mean_squared_error: 2.7164
Epoch 1: val_root_mean_squared_error improved from inf to 0.44698, saving model to cache/ensemble_camembert-base/models/mlp/2a1450dcea469212b21ac24506fdbdb637f168aa1883cb2e960ac29c15a26b4b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7833 - root_mean_squared_error: 1.6023 - val_loss: 0.1998 - val_root_mean_squared_error: 0.4470
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2173 - root_mean_squared_error: 0.4661
Epoch 2: val_root_mean_squared_error did not improve from 0.44698
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4166 - root_mean_squared_error: 0.6368 - val_loss: 0.6916 - val_root_mean_squared_error: 0.8316
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7647,4.090984,2.022618,1.824144,4.090984
46,2.4676,1.346054,1.160196,0.92807,1.346054
69,1.0558,0.822449,0.90689,0.706961,0.822449


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 645ms/step - loss: 5.2375 - root_mean_squared_error: 2.2886
Epoch 1: val_root_mean_squared_error improved from inf to 0.82115, saving model to cache/ensemble_camembert-base/models/mlp/a8c50e0ccfbdf8289669f3831aca27e988fa7a974369f1f360490b0c10204ec2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.1517 - root_mean_squared_error: 1.4131 - val_loss: 0.6743 - val_root_mean_squared_error: 0.8212
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3740 - root_mean_squared_error: 0.6115
Epoch 2: val_root_mean_squared_error improved from 0.82115 to 0.40590, saving model to cache/ensemble_camembert-base/models/mlp/a8c50e0ccfbdf8289669f3831aca27e988fa7a974369f1f360490b0c10204ec2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6787 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.5358,3.993086,1.998271,1.787021,3.993086
46,2.465,1.444788,1.201994,0.928409,1.444788
69,1.2476,0.898096,0.947679,0.766635,0.898096


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 4.6005 - root_mean_squared_error: 2.1449
Epoch 1: val_root_mean_squared_error improved from inf to 0.59197, saving model to cache/ensemble_camembert-base/models/mlp/029cb19e3a95824d844fbc08009f74d622678a2a405e362220d0376b1e3a1455_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6691 - root_mean_squared_error: 1.5835 - val_loss: 0.3504 - val_root_mean_squared_error: 0.5920
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5166 - root_mean_squared_error: 0.7187
Epoch 2: val_root_mean_squared_error improved from 0.59197 to 0.50741, saving model to cache/ensemble_camembert-base/models/mlp/029cb19e3a95824d844fbc08009f74d622678a2a405e362220d0376b1e3a1455_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5544 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6994,3.897443,1.974194,1.775312,3.897443
46,2.1982,1.236447,1.111956,0.939103,1.236447
69,1.184,0.773245,0.879343,0.764615,0.773245


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 9.5475 - root_mean_squared_error: 3.0899
Epoch 1: val_root_mean_squared_error improved from inf to 0.75063, saving model to cache/ensemble_camembert-base/models/mlp/cc10f007e808c8dc21f55f1c2a0c21c56a16da9adedad783023f786659c2d540_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3435 - root_mean_squared_error: 1.7499 - val_loss: 0.5634 - val_root_mean_squared_error: 0.7506
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.6766 - root_mean_squared_error: 0.8226
Epoch 2: val_root_mean_squared_error improved from 0.75063 to 0.63674, saving model to cache/ensemble_camembert-base/models/mlp/cc10f007e808c8dc21f55f1c2a0c21c56a16da9adedad783023f786659c2d540_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5840 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9845,2.789258,1.670107,1.419626,2.789258
46,2.0792,0.89988,0.94862,0.66088,0.89988
69,1.1445,0.786871,0.887058,0.679033,0.786871


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 685ms/step - loss: 5.5261 - root_mean_squared_error: 2.3508
Epoch 1: val_root_mean_squared_error improved from inf to 0.51136, saving model to cache/ensemble_camembert-base/models/mlp/facef04534cb76ceb729f0fe40b42eae02a897d75b6e54c654d350b2882cf476_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3249 - root_mean_squared_error: 1.4713 - val_loss: 0.2615 - val_root_mean_squared_error: 0.5114
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 0.2613 - root_mean_squared_error: 0.5112
Epoch 2: val_root_mean_squared_error did not improve from 0.51136
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5452 - root_mean_squared_error: 0.7313 - val_loss: 0.9693 - val_root_mean_squared_error: 0.9845
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4249,2.452096,1.565917,1.391337,2.452096
46,2.4638,0.646546,0.804081,0.633132,0.646546
69,1.1202,0.522189,0.722627,0.633696,0.522189


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 650ms/step - loss: 8.2900 - root_mean_squared_error: 2.8792
Epoch 1: val_root_mean_squared_error improved from inf to 0.65250, saving model to cache/ensemble_camembert-base/models/mlp/34f11206e998742daa38eab2cc516982d5615cb3084e509514f0b25124fe293a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9550 - root_mean_squared_error: 1.6389 - val_loss: 0.4258 - val_root_mean_squared_error: 0.6525
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7874 - root_mean_squared_error: 0.8873
Epoch 2: val_root_mean_squared_error improved from 0.65250 to 0.42217, saving model to cache/ensemble_camembert-base/models/mlp/34f11206e998742daa38eab2cc516982d5615cb3084e509514f0b25124fe293a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5813 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3758,4.537127,2.130053,1.892697,4.537127
46,2.4496,1.695858,1.302251,1.027787,1.695858
69,1.1688,1.070231,1.03452,0.842563,1.070231


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 11.1015 - root_mean_squared_error: 3.3319
Epoch 1: val_root_mean_squared_error improved from inf to 0.67867, saving model to cache/ensemble_camembert-base/models/mlp/ad5842e273f10b3c02181f42dd7e8ff7553572de5fb264645c719022d51d24e6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1708 - root_mean_squared_error: 1.6861 - val_loss: 0.4606 - val_root_mean_squared_error: 0.6787
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6118 - root_mean_squared_error: 0.7822
Epoch 2: val_root_mean_squared_error did not improve from 0.67867
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5136 - root_mean_squared_error: 0.7144 - val_loss: 0.6148 - val_root_mean_squared_error: 0.7841
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - los

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2827,4.297612,2.073068,1.860962,4.297612
46,2.1825,1.429385,1.195569,0.937111,1.429385
69,1.1143,0.916218,0.957193,0.745104,0.916218


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 723ms/step - loss: 8.5821 - root_mean_squared_error: 2.9295
Epoch 1: val_root_mean_squared_error improved from inf to 1.05935, saving model to cache/ensemble_camembert-base/models/mlp/df45abc26f9516b0985c41ef1fbd296225b6abce812212a05f7de3a2b5ec57a9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2526 - root_mean_squared_error: 1.7253 - val_loss: 1.1222 - val_root_mean_squared_error: 1.0594
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7183 - root_mean_squared_error: 0.8475
Epoch 2: val_root_mean_squared_error improved from 1.05935 to 0.55152, saving model to cache/ensemble_camembert-base/models/mlp/df45abc26f9516b0985c41ef1fbd296225b6abce812212a05f7de3a2b5ec57a9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4885 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.143,2.865079,1.692654,1.529724,2.865079
46,2.4796,0.755175,0.869008,0.679681,0.755175
69,1.3257,0.517912,0.719661,0.597697,0.517912


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.9188 - root_mean_squared_error: 2.8140
Epoch 1: val_root_mean_squared_error improved from inf to 0.82622, saving model to cache/ensemble_camembert-base/models/mlp/37c75ab5a5d47915158d5fbcd19515f465ec693a96e42e62adb86d30a71e561d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5782 - root_mean_squared_error: 1.5362 - val_loss: 0.6826 - val_root_mean_squared_error: 0.8262
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.7811 - root_mean_squared_error: 0.8838
Epoch 2: val_root_mean_squared_error did not improve from 0.82622
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4916 - root_mean_squared_error: 0.6971 - val_loss: 0.7835 - val_root_mean_squared_error: 0.8851
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2683,4.783576,2.187139,1.911849,4.783576
46,2.5182,2.050838,1.432075,1.114023,2.050838
69,1.4072,1.361835,1.166977,0.913831,1.361835


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 5.3421 - root_mean_squared_error: 2.3113
Epoch 1: val_root_mean_squared_error improved from inf to 0.84060, saving model to cache/ensemble_camembert-base/models/mlp/39ec40fcea04f18d768c269aeac728a9c16749968ea4a27a43be972deda9c4b0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4539 - root_mean_squared_error: 1.5118 - val_loss: 0.7066 - val_root_mean_squared_error: 0.8406
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3234 - root_mean_squared_error: 0.5687
Epoch 2: val_root_mean_squared_error did not improve from 0.84060
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5793 - root_mean_squared_error: 0.7576 - val_loss: 0.8866 - val_root_mean_squared_error: 0.9416
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6036,3.857693,1.964101,1.730199,3.857693
46,2.545,1.303589,1.141748,0.897915,1.303589
69,1.0748,0.883575,0.939987,0.760476,0.883575


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 692ms/step - loss: 7.6781 - root_mean_squared_error: 2.7709
Epoch 1: val_root_mean_squared_error improved from inf to 0.58546, saving model to cache/ensemble_camembert-base/models/mlp/e626bb537cba3c7f4ad255c387a250ece611403476f9b91b77a8afcd3b3aad26_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2647 - root_mean_squared_error: 1.7291 - val_loss: 0.3428 - val_root_mean_squared_error: 0.5855
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 0.1779 - root_mean_squared_error: 0.4217
Epoch 2: val_root_mean_squared_error did not improve from 0.58546
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4241 - root_mean_squared_error: 0.6452 - val_loss: 0.5838 - val_root_mean_squared_error: 0.7640
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3644,3.181033,1.783545,1.592288,3.181033
46,2.1846,0.900619,0.94901,0.770834,0.900619
69,0.9665,0.646327,0.803945,0.704196,0.646327


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 6.2464 - root_mean_squared_error: 2.4993
Epoch 1: val_root_mean_squared_error improved from inf to 0.79702, saving model to cache/ensemble_camembert-base/models/mlp/8f248700cab9594c85e8dd6fa07bd4718c8b835acdb6cb0fcfae3134c0b2ffa4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4902 - root_mean_squared_error: 1.5180 - val_loss: 0.6352 - val_root_mean_squared_error: 0.7970
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7221 - root_mean_squared_error: 0.8498
Epoch 2: val_root_mean_squared_error improved from 0.79702 to 0.54015, saving model to cache/ensemble_camembert-base/models/mlp/8f248700cab9594c85e8dd6fa07bd4718c8b835acdb6cb0fcfae3134c0b2ffa4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4941 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3513,3.602384,1.897995,1.646738,3.602384
46,1.9269,1.244036,1.115364,0.821555,1.244036
69,1.1778,0.892345,0.94464,0.67359,0.892345


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.2801 - root_mean_squared_error: 2.6982
Epoch 1: val_root_mean_squared_error improved from inf to 0.59399, saving model to cache/ensemble_camembert-base/models/mlp/d17df1898d7112ea92d531b496966fd4bfd36d8b85efc1d82405f5c7c13f3e8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0445 - root_mean_squared_error: 1.6705 - val_loss: 0.3528 - val_root_mean_squared_error: 0.5940
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4522 - root_mean_squared_error: 0.6725
Epoch 2: val_root_mean_squared_error improved from 0.59399 to 0.47068, saving model to cache/ensemble_camembert-base/models/mlp/d17df1898d7112ea92d531b496966fd4bfd36d8b85efc1d82405f5c7c13f3e8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4371 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4817,2.697312,1.64235,1.492272,2.697313
46,2.6641,0.703203,0.838572,0.682061,0.703203
69,1.2,0.467508,0.683745,0.516304,0.467508


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 9.8575 - root_mean_squared_error: 3.1397
Epoch 1: val_root_mean_squared_error improved from inf to 0.63288, saving model to cache/ensemble_camembert-base/models/mlp/a4030d1aeb8a5ed6cc179bddc801168ef5f67e6e7785868e19462620ea9b102d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2221 - root_mean_squared_error: 1.7095 - val_loss: 0.4005 - val_root_mean_squared_error: 0.6329
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2243 - root_mean_squared_error: 0.4736
Epoch 2: val_root_mean_squared_error improved from 0.63288 to 0.56890, saving model to cache/ensemble_camembert-base/models/mlp/a4030d1aeb8a5ed6cc179bddc801168ef5f67e6e7785868e19462620ea9b102d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3568 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7329,4.458552,2.111528,1.944494,4.458553
46,2.4728,1.57705,1.255807,1.011665,1.57705
69,1.3246,0.87843,0.937246,0.74538,0.87843


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 6.3265 - root_mean_squared_error: 2.5153
Epoch 1: val_root_mean_squared_error improved from inf to 0.51956, saving model to cache/ensemble_camembert-base/models/mlp/77f569c07dd1eac26d720462e7d0af96c3c7e731bee32bab8b67b5319fda50ba_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4832 - root_mean_squared_error: 1.5114 - val_loss: 0.2699 - val_root_mean_squared_error: 0.5196
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1350 - root_mean_squared_error: 0.3674
Epoch 2: val_root_mean_squared_error did not improve from 0.51956
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4755 - root_mean_squared_error: 0.6788 - val_loss: 0.3260 - val_root_mean_squared_error: 0.5709
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2204,3.913311,1.978209,1.779913,3.913311
46,2.3328,1.256796,1.121069,0.922621,1.256796
69,1.1306,0.786219,0.88669,0.736112,0.786219


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 5.7381 - root_mean_squared_error: 2.3954
Epoch 1: val_root_mean_squared_error improved from inf to 0.56927, saving model to cache/ensemble_camembert-base/models/mlp/d26ae0a20ae743737145596f4d9c5ea917cc0e6b21952bd0cc08f0910e00ea63_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8166 - root_mean_squared_error: 1.6163 - val_loss: 0.3241 - val_root_mean_squared_error: 0.5693
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3279 - root_mean_squared_error: 0.5727
Epoch 2: val_root_mean_squared_error did not improve from 0.56927
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3515 - root_mean_squared_error: 0.5904 - val_loss: 0.3925 - val_root_mean_squared_error: 0.6265
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5794,4.344209,2.084276,1.835677,4.344208
46,2.3111,1.588088,1.260194,1.041371,1.588088
69,1.0196,1.04545,1.022473,0.884697,1.04545


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 645ms/step - loss: 8.6182 - root_mean_squared_error: 2.9357
Epoch 1: val_root_mean_squared_error improved from inf to 0.72153, saving model to cache/ensemble_camembert-base/models/mlp/4c56ac4a9ea9ed8677b4d1f4451e9e25aec3420fa32ee50aa6bbc7dad24a93b5_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9353 - root_mean_squared_error: 1.6366 - val_loss: 0.5206 - val_root_mean_squared_error: 0.7215
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2275 - root_mean_squared_error: 0.4770
Epoch 2: val_root_mean_squared_error improved from 0.72153 to 0.51968, saving model to cache/ensemble_camembert-base/models/mlp/4c56ac4a9ea9ed8677b4d1f4451e9e25aec3420fa32ee50aa6bbc7dad24a93b5_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4479 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.407,3.61589,1.901549,1.686714,3.61589
46,2.3191,1.123592,1.059996,0.787447,1.123592
69,1.1303,0.773259,0.879351,0.670637,0.773259


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 7.1010 - root_mean_squared_error: 2.6648
Epoch 1: val_root_mean_squared_error improved from inf to 0.73462, saving model to cache/ensemble_camembert-base/models/mlp/c0cbf6959310af5e3a02129fce938634c200197d3ba21af4fd824c63129c64dc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - loss: 2.7332 - root_mean_squared_error: 1.5927 - val_loss: 0.5397 - val_root_mean_squared_error: 0.7346
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7241 - root_mean_squared_error: 0.8509
Epoch 2: val_root_mean_squared_error improved from 0.73462 to 0.45215, saving model to cache/ensemble_camembert-base/models/mlp/c0cbf6959310af5e3a02129fce938634c200197d3ba21af4fd824c63129c64dc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5752 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2916,3.372289,1.836379,1.660306,3.372289
46,2.445,0.988226,0.994096,0.800137,0.988226
69,1.1396,0.630497,0.794038,0.603939,0.630497


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 5.0535 - root_mean_squared_error: 2.2480
Epoch 1: val_root_mean_squared_error improved from inf to 0.76262, saving model to cache/ensemble_camembert-base/models/mlp/436e7d794d2ae85a585f44f4dbf0544bffce9e73d769a22e5512f743b81de5ff_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5421 - root_mean_squared_error: 1.5448 - val_loss: 0.5816 - val_root_mean_squared_error: 0.7626
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8054 - root_mean_squared_error: 0.8975
Epoch 2: val_root_mean_squared_error did not improve from 0.76262
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6011 - root_mean_squared_error: 0.7710 - val_loss: 0.7423 - val_root_mean_squared_error: 0.8616
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2645,4.34676,2.084889,1.800595,4.34676
46,2.3308,1.65568,1.286732,0.966358,1.65568
69,1.1893,1.152613,1.073598,0.888142,1.152613


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 6.4349 - root_mean_squared_error: 2.5367
Epoch 1: val_root_mean_squared_error improved from inf to 0.59305, saving model to cache/ensemble_camembert-base/models/mlp/b45044d996c11d2c79da9b6e32f056dbd8905c6dbd90f85b289eaa1883f4bdca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1650 - root_mean_squared_error: 1.7192 - val_loss: 0.3517 - val_root_mean_squared_error: 0.5930
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 0.3067 - root_mean_squared_error: 0.5538
Epoch 2: val_root_mean_squared_error improved from 0.59305 to 0.41433, saving model to cache/ensemble_camembert-base/models/mlp/b45044d996c11d2c79da9b6e32f056dbd8905c6dbd90f85b289eaa1883f4bdca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4143 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7038,4.738416,2.17679,1.898059,4.738416
46,2.5935,1.970391,1.403706,1.089286,1.970391
69,1.4511,1.304972,1.142354,0.947118,1.304972


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 5.8586 - root_mean_squared_error: 2.4204
Epoch 1: val_root_mean_squared_error improved from inf to 1.28288, saving model to cache/ensemble_camembert-base/models/mlp/20e2dc881b62a2102e48574eaab177b1651038e73f0fecd63ccd589e6cc3036b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8530 - root_mean_squared_error: 1.6233 - val_loss: 1.6458 - val_root_mean_squared_error: 1.2829
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.5123 - root_mean_squared_error: 1.2298
Epoch 2: val_root_mean_squared_error improved from 1.28288 to 0.56244, saving model to cache/ensemble_camembert-base/models/mlp/20e2dc881b62a2102e48574eaab177b1651038e73f0fecd63ccd589e6cc3036b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5514 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7552,3.604383,1.898521,1.736726,3.604383
46,2.4738,1.004527,1.002261,0.823969,1.004527
69,1.1975,0.609782,0.780886,0.613245,0.609782


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 7.7730 - root_mean_squared_error: 2.7880
Epoch 1: val_root_mean_squared_error improved from inf to 1.09601, saving model to cache/ensemble_camembert-base/models/mlp/65affc2c1a6fb4882786c9f51e6acb1386639b53acf4aefdee8f10a536775692_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7663 - root_mean_squared_error: 1.5878 - val_loss: 1.2012 - val_root_mean_squared_error: 1.0960
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8891 - root_mean_squared_error: 0.9429
Epoch 2: val_root_mean_squared_error improved from 1.09601 to 0.52394, saving model to cache/ensemble_camembert-base/models/mlp/65affc2c1a6fb4882786c9f51e6acb1386639b53acf4aefdee8f10a536775692_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6089 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0133,2.861723,1.691663,1.44171,2.861723
46,2.3897,0.957714,0.978629,0.69881,0.957714
69,1.4797,0.774266,0.879924,0.721227,0.774266


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 9.4539 - root_mean_squared_error: 3.0747
Epoch 1: val_root_mean_squared_error improved from inf to 0.63729, saving model to cache/ensemble_camembert-base/models/mlp/7bac9d80c284abe30ebff2e0189d77fc113418a29546fd977745a7dffaf0a51b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.7401 - root_mean_squared_error: 1.8403 - val_loss: 0.4061 - val_root_mean_squared_error: 0.6373
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4173 - root_mean_squared_error: 0.6460
Epoch 2: val_root_mean_squared_error improved from 0.63729 to 0.48758, saving model to cache/ensemble_camembert-base/models/mlp/7bac9d80c284abe30ebff2e0189d77fc113418a29546fd977745a7dffaf0a51b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5590 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0464,3.122383,1.767027,1.559333,3.122383
46,2.3755,0.961007,0.98031,0.76393,0.961007
69,0.9255,0.683413,0.826688,0.660903,0.683413


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.6627 - root_mean_squared_error: 2.5812
Epoch 1: val_root_mean_squared_error improved from inf to 0.58114, saving model to cache/ensemble_camembert-base/models/mlp/055da26a423a24b552e3b5e3a2863d2acdedd9779ee728efaad7c6a29c6fa52c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5638 - root_mean_squared_error: 1.5416 - val_loss: 0.3377 - val_root_mean_squared_error: 0.5811
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.2954 - root_mean_squared_error: 0.5435
Epoch 2: val_root_mean_squared_error improved from 0.58114 to 0.46953, saving model to cache/ensemble_camembert-base/models/mlp/055da26a423a24b552e3b5e3a2863d2acdedd9779ee728efaad7c6a29c6fa52c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5112 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7738,4.505418,2.122597,1.87056,4.505418
46,2.5831,1.853069,1.361275,0.971873,1.853069
69,1.1224,1.19531,1.093302,0.772696,1.19531


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 9.6218 - root_mean_squared_error: 3.1019
Epoch 1: val_root_mean_squared_error improved from inf to 0.72749, saving model to cache/ensemble_camembert-base/models/mlp/c944a7f1a0f7fedf47606afbbe0577897311f40e490db1aac90bce3b2bb3d221_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3072 - root_mean_squared_error: 1.7274 - val_loss: 0.5292 - val_root_mean_squared_error: 0.7275
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4785 - root_mean_squared_error: 0.6918
Epoch 2: val_root_mean_squared_error improved from 0.72749 to 0.53731, saving model to cache/ensemble_camembert-base/models/mlp/c944a7f1a0f7fedf47606afbbe0577897311f40e490db1aac90bce3b2bb3d221_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4978 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5228,3.661673,1.91355,1.694448,3.661673
46,2.2518,1.195619,1.093444,0.834138,1.195619
69,1.2218,0.801867,0.89547,0.68655,0.801867


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 6.3021 - root_mean_squared_error: 2.5104
Epoch 1: val_root_mean_squared_error improved from inf to 0.71964, saving model to cache/ensemble_camembert-base/models/mlp/b0fde418d37f22d0d5b5c110bf7c7c5fce90b5ca7dd94b521d5b08417a4cd725_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5093 - root_mean_squared_error: 1.5179 - val_loss: 0.5179 - val_root_mean_squared_error: 0.7196
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2995 - root_mean_squared_error: 0.5473
Epoch 2: val_root_mean_squared_error did not improve from 0.71964
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3966 - root_mean_squared_error: 0.6276 - val_loss: 0.5786 - val_root_mean_squared_error: 0.7607
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6314,3.991987,1.997996,1.79567,3.991987
46,2.7159,1.364339,1.168049,0.936703,1.364339
69,1.2874,0.837088,0.914925,0.713796,0.837088


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 7.8043 - root_mean_squared_error: 2.7936
Epoch 1: val_root_mean_squared_error improved from inf to 1.02170, saving model to cache/ensemble_camembert-base/models/mlp/06fa20ad0b456dfb06cfde9187868e2249f8f0912994bfbcbf1452469e725334_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7560 - root_mean_squared_error: 1.5878 - val_loss: 1.0439 - val_root_mean_squared_error: 1.0217
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.0888 - root_mean_squared_error: 1.0435
Epoch 2: val_root_mean_squared_error improved from 1.02170 to 0.57353, saving model to cache/ensemble_camembert-base/models/mlp/06fa20ad0b456dfb06cfde9187868e2249f8f0912994bfbcbf1452469e725334_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6878 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6312,3.13402,1.770316,1.57106,3.13402
46,2.8767,0.976548,0.988205,0.739877,0.976548
69,1.094,0.67033,0.818737,0.680356,0.67033


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 7.1594 - root_mean_squared_error: 2.6757
Epoch 1: val_root_mean_squared_error improved from inf to 0.51422, saving model to cache/ensemble_camembert-base/models/mlp/3d3af924d189cfeb3f8d2ae7288a756ed8b116c4efc6a01f8122c8f3c9d40341_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2258 - root_mean_squared_error: 1.7199 - val_loss: 0.2644 - val_root_mean_squared_error: 0.5142
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1789 - root_mean_squared_error: 0.4230
Epoch 2: val_root_mean_squared_error did not improve from 0.51422
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7117 - root_mean_squared_error: 0.8299 - val_loss: 0.4338 - val_root_mean_squared_error: 0.6586
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0179,4.018919,2.004724,1.777854,4.018919
46,2.3226,1.378388,1.174048,0.954558,1.378388
69,1.2168,0.892434,0.944687,0.756316,0.892434


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 647ms/step - loss: 4.8289 - root_mean_squared_error: 2.1975
Epoch 1: val_root_mean_squared_error improved from inf to 0.72703, saving model to cache/ensemble_camembert-base/models/mlp/90d3c73e5c746e18a33d9d3dddcaf150f251aec4f94b7d7ac30e8364d4ec6116_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7752 - root_mean_squared_error: 1.6137 - val_loss: 0.5286 - val_root_mean_squared_error: 0.7270
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.1383 - root_mean_squared_error: 0.3719
Epoch 2: val_root_mean_squared_error did not improve from 0.72703
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5755 - root_mean_squared_error: 0.7465 - val_loss: 1.2202 - val_root_mean_squared_error: 1.1046
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6041,4.345022,2.084472,1.842905,4.345022
46,2.2698,1.46108,1.208751,0.935386,1.46108
69,1.0315,0.998213,0.999106,0.820892,0.998213


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.4151 - root_mean_squared_error: 2.7231
Epoch 1: val_root_mean_squared_error improved from inf to 1.11386, saving model to cache/ensemble_camembert-base/models/mlp/73acf978ad8f41e98dd0e9e08f275aac6cf8e1a50f0ff2c252f07b49f3dc0879_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0012 - root_mean_squared_error: 1.6631 - val_loss: 1.2407 - val_root_mean_squared_error: 1.1139
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8042 - root_mean_squared_error: 0.8968
Epoch 2: val_root_mean_squared_error improved from 1.11386 to 0.73453, saving model to cache/ensemble_camembert-base/models/mlp/73acf978ad8f41e98dd0e9e08f275aac6cf8e1a50f0ff2c252f07b49f3dc0879_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5110 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9304,4.849138,2.202076,1.959874,4.849138
46,2.6115,1.867881,1.366704,1.063648,1.867881
69,1.0787,1.210306,1.100139,0.855105,1.210306


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 6.6316 - root_mean_squared_error: 2.5752
Epoch 1: val_root_mean_squared_error improved from inf to 0.67654, saving model to cache/ensemble_camembert-base/models/mlp/614251bb412d8bb31c7b47a413d1df59922aad4b7c9a8e70a43f5d658c926367_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2730 - root_mean_squared_error: 1.7318 - val_loss: 0.4577 - val_root_mean_squared_error: 0.6765
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4151 - root_mean_squared_error: 0.6443
Epoch 2: val_root_mean_squared_error improved from 0.67654 to 0.62537, saving model to cache/ensemble_camembert-base/models/mlp/614251bb412d8bb31c7b47a413d1df59922aad4b7c9a8e70a43f5d658c926367_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5953 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8987,4.577977,2.139621,1.858405,4.577976
46,2.4943,1.776457,1.332838,1.046822,1.776456
69,0.9424,1.213305,1.101501,0.898521,1.213304


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 646ms/step - loss: 8.2183 - root_mean_squared_error: 2.8667
Epoch 1: val_root_mean_squared_error improved from inf to 0.57646, saving model to cache/ensemble_camembert-base/models/mlp/0ef4a28e534245723fd5781547e6471becf4f87dfb0793474aa234ec88389764_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2971 - root_mean_squared_error: 1.7351 - val_loss: 0.3323 - val_root_mean_squared_error: 0.5765
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2331 - root_mean_squared_error: 0.4828
Epoch 2: val_root_mean_squared_error did not improve from 0.57646
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4446 - root_mean_squared_error: 0.6628 - val_loss: 0.4374 - val_root_mean_squared_error: 0.6614
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6018,4.524742,2.127144,1.80445,4.524741
46,2.3398,1.895979,1.376946,1.050964,1.895979
69,0.9622,1.356836,1.164833,0.959516,1.356836


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 9.4172 - root_mean_squared_error: 3.0687
Epoch 1: val_root_mean_squared_error improved from inf to 0.65623, saving model to cache/ensemble_camembert-base/models/mlp/797ef13d47dcbe26d654acb36196e11c1fecfa1995dc0fe00e8b78e0e3ac2258_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8434 - root_mean_squared_error: 1.6053 - val_loss: 0.4306 - val_root_mean_squared_error: 0.6562
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2632 - root_mean_squared_error: 0.5131
Epoch 2: val_root_mean_squared_error improved from 0.65623 to 0.43977, saving model to cache/ensemble_camembert-base/models/mlp/797ef13d47dcbe26d654acb36196e11c1fecfa1995dc0fe00e8b78e0e3ac2258_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.4359 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0579,3.733432,1.932209,1.692309,3.733432
46,2.3475,1.28664,1.134301,0.861651,1.28664
69,0.9928,0.888042,0.94236,0.719813,0.888042


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 4.9852 - root_mean_squared_error: 2.2328
Epoch 1: val_root_mean_squared_error improved from inf to 0.59478, saving model to cache/ensemble_camembert-base/models/mlp/bb9e7b05d0b7844846c139c788f682a4587c64a7747caf3ae01f00ffaec74d0d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6611 - root_mean_squared_error: 1.5722 - val_loss: 0.3538 - val_root_mean_squared_error: 0.5948
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3503 - root_mean_squared_error: 0.5918
Epoch 2: val_root_mean_squared_error did not improve from 0.59478
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5419 - root_mean_squared_error: 0.7329 - val_loss: 1.4142 - val_root_mean_squared_error: 1.1892
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2274,3.161942,1.778185,1.576157,3.161942
46,2.1226,0.949499,0.974423,0.771139,0.949499
69,0.9947,0.672902,0.820306,0.66489,0.672902


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 5.2854 - root_mean_squared_error: 2.2990
Epoch 1: val_root_mean_squared_error improved from inf to 0.86724, saving model to cache/ensemble_camembert-base/models/mlp/3ea417c9c0d8a0370d94890257240c44e30048fb93b1e6eeb4f211a208c36dd0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3871 - root_mean_squared_error: 1.4909 - val_loss: 0.7521 - val_root_mean_squared_error: 0.8672
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.4554 - root_mean_squared_error: 1.2064
Epoch 2: val_root_mean_squared_error improved from 0.86724 to 0.60518, saving model to cache/ensemble_camembert-base/models/mlp/3ea417c9c0d8a0370d94890257240c44e30048fb93b1e6eeb4f211a208c36dd0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7731 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4442,3.281868,1.811593,1.611529,3.281869
46,2.0607,1.0245,1.012176,0.820546,1.0245
69,1.1528,0.681557,0.825565,0.685983,0.681557


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 5.1299 - root_mean_squared_error: 2.2649
Epoch 1: val_root_mean_squared_error improved from inf to 1.33564, saving model to cache/ensemble_camembert-base/models/mlp/378c10cb5ca9633e75c21c976a1735dc24958e2a55f8cca505c432d26cfe9f39_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6038 - root_mean_squared_error: 1.5565 - val_loss: 1.7839 - val_root_mean_squared_error: 1.3356
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.8255 - root_mean_squared_error: 0.9086
Epoch 2: val_root_mean_squared_error improved from 1.33564 to 0.49913, saving model to cache/ensemble_camembert-base/models/mlp/378c10cb5ca9633e75c21c976a1735dc24958e2a55f8cca505c432d26cfe9f39_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4545 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7532,2.731613,1.652759,1.484342,2.731613
46,2.0879,0.664779,0.81534,0.697316,0.664779
69,1.1881,0.544091,0.737625,0.620277,0.544091


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 8.0302 - root_mean_squared_error: 2.8338
Epoch 1: val_root_mean_squared_error improved from inf to 0.64035, saving model to cache/ensemble_camembert-base/models/mlp/fae9f592ac99d8425d0d402253b25bc6dc26419b629a0fc7cbe7710414ae9a8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9612 - root_mean_squared_error: 1.6516 - val_loss: 0.4100 - val_root_mean_squared_error: 0.6403
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3917 - root_mean_squared_error: 0.6259
Epoch 2: val_root_mean_squared_error improved from 0.64035 to 0.44442, saving model to cache/ensemble_camembert-base/models/mlp/fae9f592ac99d8425d0d402253b25bc6dc26419b629a0fc7cbe7710414ae9a8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4136 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5839,3.553555,1.885087,1.696177,3.553555
46,2.3222,1.143395,1.069296,0.831834,1.143395
69,1.1997,0.713628,0.844765,0.646128,0.713628


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 5.2730 - root_mean_squared_error: 2.2963
Epoch 1: val_root_mean_squared_error improved from inf to 0.55126, saving model to cache/ensemble_camembert-base/models/mlp/cc948c3e4c896b432bd30fbc57bf2fa850b5cd00ae138e5a8d26f8f158344945_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3552 - root_mean_squared_error: 1.4876 - val_loss: 0.3039 - val_root_mean_squared_error: 0.5513
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3040 - root_mean_squared_error: 0.5514
Epoch 2: val_root_mean_squared_error improved from 0.55126 to 0.43806, saving model to cache/ensemble_camembert-base/models/mlp/cc948c3e4c896b432bd30fbc57bf2fa850b5cd00ae138e5a8d26f8f158344945_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6990 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2082,4.460898,2.112084,1.851651,4.460898
46,2.2359,1.675721,1.294497,1.003768,1.675721
69,0.9202,1.116299,1.056551,0.786866,1.116299


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 645ms/step - loss: 8.1729 - root_mean_squared_error: 2.8588
Epoch 1: val_root_mean_squared_error improved from inf to 1.03875, saving model to cache/ensemble_camembert-base/models/mlp/a8eaebffc0733744c24dc44c1d8a88151103d440835cf5494aa5786b69ef3e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9258 - root_mean_squared_error: 1.6334 - val_loss: 1.0790 - val_root_mean_squared_error: 1.0388
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.4849 - root_mean_squared_error: 1.2186
Epoch 2: val_root_mean_squared_error improved from 1.03875 to 0.63348, saving model to cache/ensemble_camembert-base/models/mlp/a8eaebffc0733744c24dc44c1d8a88151103d440835cf5494aa5786b69ef3e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5873 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3941,4.087292,2.021705,1.734741,4.087292
46,2.448,1.62134,1.273318,1.026988,1.62134
69,1.3158,1.137933,1.066739,0.895548,1.137933


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 647ms/step - loss: 5.9127 - root_mean_squared_error: 2.4316
Epoch 1: val_root_mean_squared_error improved from inf to 0.56998, saving model to cache/ensemble_camembert-base/models/mlp/d03ef7687352d9539ad20a9489ef19acd31ec059a6862a9a512583b5ee3c9ffe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7012 - root_mean_squared_error: 1.5849 - val_loss: 0.3249 - val_root_mean_squared_error: 0.5700
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3442 - root_mean_squared_error: 0.5867
Epoch 2: val_root_mean_squared_error did not improve from 0.56998
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6712 - root_mean_squared_error: 0.8107 - val_loss: 0.6973 - val_root_mean_squared_error: 0.8351
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6319,3.023976,1.738958,1.411923,3.023976
46,2.4507,1.192,1.091787,0.816903,1.192
69,1.3133,1.040403,1.020002,0.821178,1.040403


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.7538 - root_mean_squared_error: 2.7846
Epoch 1: val_root_mean_squared_error improved from inf to 0.66845, saving model to cache/ensemble_camembert-base/models/mlp/6ee7a61e0c51edc963b5c4a3b46e67d88dfabcb9981508526cab3b47b7999ff3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7998 - root_mean_squared_error: 1.6006 - val_loss: 0.4468 - val_root_mean_squared_error: 0.6684
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3244 - root_mean_squared_error: 0.5696
Epoch 2: val_root_mean_squared_error did not improve from 0.66845
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5665 - root_mean_squared_error: 0.7458 - val_loss: 0.7415 - val_root_mean_squared_error: 0.8611
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8648,3.537909,1.880933,1.65861,3.537908
46,2.3623,1.133083,1.064464,0.839377,1.133084
69,1.0267,0.784505,0.885723,0.701237,0.784505


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 6.4084 - root_mean_squared_error: 2.5315
Epoch 1: val_root_mean_squared_error improved from inf to 0.58000, saving model to cache/ensemble_camembert-base/models/mlp/75b1bdb998dffff9b90429284b355bbfb687cb2e4ecee2340e030322b57e925a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3567 - root_mean_squared_error: 1.4765 - val_loss: 0.3364 - val_root_mean_squared_error: 0.5800
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3760 - root_mean_squared_error: 0.6132
Epoch 2: val_root_mean_squared_error improved from 0.58000 to 0.51327, saving model to cache/ensemble_camembert-base/models/mlp/75b1bdb998dffff9b90429284b355bbfb687cb2e4ecee2340e030322b57e925a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4990 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2025,3.643325,1.90875,1.682553,3.643325
46,2.3872,1.310394,1.144724,0.879652,1.310394
69,1.3254,0.860297,0.927522,0.770314,0.860297


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 4.8533 - root_mean_squared_error: 2.2030
Epoch 1: val_root_mean_squared_error improved from inf to 0.65840, saving model to cache/ensemble_camembert-base/models/mlp/3fa44c0dd855d51bd227c7e0b227afbf92085597d6c3e2505bf6dc0731839066_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8164 - root_mean_squared_error: 1.6275 - val_loss: 0.4335 - val_root_mean_squared_error: 0.6584
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5057 - root_mean_squared_error: 0.7111
Epoch 2: val_root_mean_squared_error did not improve from 0.65840
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4245 - root_mean_squared_error: 0.6497 - val_loss: 0.5536 - val_root_mean_squared_error: 0.7440
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7929,3.733377,1.932195,1.729607,3.733378
46,2.2554,1.169146,1.08127,0.893606,1.169146
69,1.1439,0.75604,0.869506,0.744859,0.75604


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 10.0339 - root_mean_squared_error: 3.1676
Epoch 1: val_root_mean_squared_error improved from inf to 0.83923, saving model to cache/ensemble_camembert-base/models/mlp/07c721c138ec732f0fcf9a79f008d8a78607776f01781a6c00fcdd8f35514301_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4366 - root_mean_squared_error: 1.7666 - val_loss: 0.7043 - val_root_mean_squared_error: 0.8392
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.8027 - root_mean_squared_error: 0.8959
Epoch 2: val_root_mean_squared_error did not improve from 0.83923
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4538 - root_mean_squared_error: 0.6679 - val_loss: 2.0252 - val_root_mean_squared_error: 1.4231
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - los

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4862,2.900068,1.702959,1.469117,2.900068
46,1.9247,0.918246,0.958252,0.69863,0.918246
69,1.1469,0.736394,0.858134,0.648541,0.736394


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 5.0665 - root_mean_squared_error: 2.2509
Epoch 1: val_root_mean_squared_error improved from inf to 0.44884, saving model to cache/ensemble_camembert-base/models/mlp/fb49d75f95a3ac4425bb74a03058702b65ca3e49722e4cae29ce0591fc770417_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3313 - root_mean_squared_error: 1.4798 - val_loss: 0.2015 - val_root_mean_squared_error: 0.4488
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.1889 - root_mean_squared_error: 0.4347
Epoch 2: val_root_mean_squared_error did not improve from 0.44884
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5946 - root_mean_squared_error: 0.7607 - val_loss: 0.7195 - val_root_mean_squared_error: 0.8483
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4651,2.649696,1.627789,1.463334,2.649696
46,2.3061,0.694609,0.833432,0.665197,0.694609
69,1.0668,0.502278,0.708716,0.599725,0.502278


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 7.4670 - root_mean_squared_error: 2.7326
Epoch 1: val_root_mean_squared_error improved from inf to 0.52863, saving model to cache/ensemble_camembert-base/models/mlp/88b26dbf5ebe0e276ce713f83c865118d1b202ee60e8cbc364ee5a252d0e8135_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0814 - root_mean_squared_error: 1.6803 - val_loss: 0.2795 - val_root_mean_squared_error: 0.5286
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4842 - root_mean_squared_error: 0.6959
Epoch 2: val_root_mean_squared_error did not improve from 0.52863
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5293 - root_mean_squared_error: 0.7259 - val_loss: 0.7072 - val_root_mean_squared_error: 0.8410
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3774,4.120065,2.029794,1.74398,4.120065
46,2.3974,1.566228,1.25149,0.953434,1.566228
69,1.1247,1.098304,1.048,0.848696,1.098304


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 9.9855 - root_mean_squared_error: 3.1600
Epoch 1: val_root_mean_squared_error improved from inf to 0.56078, saving model to cache/ensemble_camembert-base/models/mlp/4f63c5812d7c65a626aa73fcd436ef60b6d4715b95e45bf09bf0bcd9fdd30d12_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8428 - root_mean_squared_error: 1.6002 - val_loss: 0.3145 - val_root_mean_squared_error: 0.5608
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3769 - root_mean_squared_error: 0.6139
Epoch 2: val_root_mean_squared_error did not improve from 0.56078
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6382 - root_mean_squared_error: 0.7897 - val_loss: 0.3444 - val_root_mean_squared_error: 0.5868
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2465,4.145837,2.036133,1.810134,4.145837
46,2.0281,1.393912,1.18064,0.937859,1.393912
69,1.1481,0.926629,0.962616,0.7427,0.926629


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 8.3412 - root_mean_squared_error: 2.8881
Epoch 1: val_root_mean_squared_error improved from inf to 1.20780, saving model to cache/ensemble_camembert-base/models/mlp/8bac784b71a0567891320418c97082ddd75ab91a9461d3d3ba89935fb079b93b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3231 - root_mean_squared_error: 1.7460 - val_loss: 1.4588 - val_root_mean_squared_error: 1.2078
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.2947 - root_mean_squared_error: 1.1379
Epoch 2: val_root_mean_squared_error improved from 1.20780 to 0.60563, saving model to cache/ensemble_camembert-base/models/mlp/8bac784b71a0567891320418c97082ddd75ab91a9461d3d3ba89935fb079b93b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5572 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0749,2.979071,1.725999,1.517051,2.979071
46,2.3907,0.882637,0.939488,0.696776,0.882637
69,1.2456,0.669562,0.818268,0.666341,0.669562


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 680ms/step - loss: 7.6598 - root_mean_squared_error: 2.7676
Epoch 1: val_root_mean_squared_error improved from inf to 0.42448, saving model to cache/ensemble_camembert-base/models/mlp/5170136033579d50495d65ecbad40ce8b7f100136bc373ab352211343c2b1a12_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4093 - root_mean_squared_error: 1.4845 - val_loss: 0.1802 - val_root_mean_squared_error: 0.4245
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2956 - root_mean_squared_error: 0.5437
Epoch 2: val_root_mean_squared_error did not improve from 0.42448
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4706 - root_mean_squared_error: 0.6839 - val_loss: 0.7292 - val_root_mean_squared_error: 0.8539
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3384,4.6502,2.156432,1.868758,4.6502
46,2.3597,1.962709,1.400967,1.044463,1.962709
69,1.3362,1.326505,1.15174,0.871587,1.326505


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 7.7856 - root_mean_squared_error: 2.7903
Epoch 1: val_root_mean_squared_error improved from inf to 0.62294, saving model to cache/ensemble_camembert-base/models/mlp/84acc7d4a7a28c8894cc67d8b6b14b7230685d24924d875239b8a2aee6de0aae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7947 - root_mean_squared_error: 1.5984 - val_loss: 0.3880 - val_root_mean_squared_error: 0.6229
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3601 - root_mean_squared_error: 0.6001
Epoch 2: val_root_mean_squared_error did not improve from 0.62294
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5457 - root_mean_squared_error: 0.7340 - val_loss: 1.2930 - val_root_mean_squared_error: 1.1371
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7693,3.652548,1.911164,1.707843,3.652548
46,2.4236,1.135145,1.065432,0.849225,1.135145
69,1.0472,0.748602,0.865218,0.713506,0.748602


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 7.6168 - root_mean_squared_error: 2.7599
Epoch 1: val_root_mean_squared_error improved from inf to 0.59914, saving model to cache/ensemble_camembert-base/models/mlp/07cf89bb1a4d57367addb8b2148820a0161dd4ddc5f99dc33bc0fa8a0f616415_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2203 - root_mean_squared_error: 1.7198 - val_loss: 0.3590 - val_root_mean_squared_error: 0.5991
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2909 - root_mean_squared_error: 0.5393
Epoch 2: val_root_mean_squared_error did not improve from 0.59914
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4993 - root_mean_squared_error: 0.7028 - val_loss: 0.7437 - val_root_mean_squared_error: 0.8624
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2952,3.672489,1.916374,1.731008,3.672489
46,2.1392,1.099409,1.048527,0.874532,1.099408
69,0.9643,0.694625,0.833442,0.722923,0.694625


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 5.4326 - root_mean_squared_error: 2.3308
Epoch 1: val_root_mean_squared_error improved from inf to 0.85164, saving model to cache/ensemble_camembert-base/models/mlp/83ec554b6a76100907f47604b907631691cd32ef6b91d85d530a57562a9b76b4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3405 - root_mean_squared_error: 1.4772 - val_loss: 0.7253 - val_root_mean_squared_error: 0.8516
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5999 - root_mean_squared_error: 0.7745
Epoch 2: val_root_mean_squared_error improved from 0.85164 to 0.60652, saving model to cache/ensemble_camembert-base/models/mlp/83ec554b6a76100907f47604b907631691cd32ef6b91d85d530a57562a9b76b4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5447 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1555,4.327753,2.080325,1.851289,4.327753
46,1.8639,1.50775,1.227905,0.956698,1.50775
69,1.0523,0.970831,0.985307,0.757281,0.970831


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 6.7775 - root_mean_squared_error: 2.6034
Epoch 1: val_root_mean_squared_error improved from inf to 0.52569, saving model to cache/ensemble_camembert-base/models/mlp/d5d3fc6bed7ec653c29f8e2742f4a8da6e184f6e0227a1c0333c7a7fdbdc1b6a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9552 - root_mean_squared_error: 1.6532 - val_loss: 0.2764 - val_root_mean_squared_error: 0.5257
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3021 - root_mean_squared_error: 0.5497
Epoch 2: val_root_mean_squared_error improved from 0.52569 to 0.48231, saving model to cache/ensemble_camembert-base/models/mlp/d5d3fc6bed7ec653c29f8e2742f4a8da6e184f6e0227a1c0333c7a7fdbdc1b6a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3447 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2603,2.863521,1.692194,1.5163,2.863521
46,2.6738,0.828002,0.909946,0.767926,0.828002
69,1.1887,0.557986,0.746984,0.618896,0.557986


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 10.1586 - root_mean_squared_error: 3.1873
Epoch 1: val_root_mean_squared_error improved from inf to 0.60084, saving model to cache/ensemble_camembert-base/models/mlp/4dfb0c8b4dff33447592e9090269c6f47768fdef852b998f4bd2c0bf9774a47c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3794 - root_mean_squared_error: 1.7515 - val_loss: 0.3610 - val_root_mean_squared_error: 0.6008
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2298 - root_mean_squared_error: 0.4794
Epoch 2: val_root_mean_squared_error improved from 0.60084 to 0.50082, saving model to cache/ensemble_camembert-base/models/mlp/4dfb0c8b4dff33447592e9090269c6f47768fdef852b998f4bd2c0bf9774a47c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3681 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0017,3.579311,1.891907,1.699749,3.579311
46,2.4197,1.171872,1.082531,0.845587,1.171872
69,1.4828,0.724498,0.851175,0.687627,0.724498


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 5.4686 - root_mean_squared_error: 2.3385
Epoch 1: val_root_mean_squared_error improved from inf to 0.50765, saving model to cache/ensemble_camembert-base/models/mlp/90627c4cf927a73c683716766299bb210411545779725ff855a9709ed3e568aa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4215 - root_mean_squared_error: 1.5021 - val_loss: 0.2577 - val_root_mean_squared_error: 0.5076
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3454 - root_mean_squared_error: 0.5877
Epoch 2: val_root_mean_squared_error did not improve from 0.50765
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6378 - root_mean_squared_error: 0.7928 - val_loss: 0.4793 - val_root_mean_squared_error: 0.6923
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9132,4.122593,2.030417,1.814394,4.122593
46,2.2474,1.440069,1.200029,0.959296,1.440069
69,1.1103,0.913405,0.955722,0.780742,0.913405


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 4.6331 - root_mean_squared_error: 2.1525
Epoch 1: val_root_mean_squared_error improved from inf to 0.77217, saving model to cache/ensemble_camembert-base/models/mlp/484585ffd706500a7f7ace1488b27c7e2241b40b06102f345843c09c6fad1282_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6604 - root_mean_squared_error: 1.5756 - val_loss: 0.5963 - val_root_mean_squared_error: 0.7722
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2666 - root_mean_squared_error: 0.5163
Epoch 2: val_root_mean_squared_error did not improve from 0.77217
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3897 - root_mean_squared_error: 0.6195 - val_loss: 0.8188 - val_root_mean_squared_error: 0.9049
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5701,3.697866,1.922984,1.687065,3.697866
46,2.3981,1.276218,1.129698,0.88846,1.276218
69,1.0671,0.863619,0.929311,0.786823,0.863619


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 7.4876 - root_mean_squared_error: 2.7363
Epoch 1: val_root_mean_squared_error improved from inf to 0.89720, saving model to cache/ensemble_camembert-base/models/mlp/9c9b88e3d727ee7efcc7e46815c5a0506914622fdc2c6cace5dad8495e0d2070_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8472 - root_mean_squared_error: 1.6193 - val_loss: 0.8050 - val_root_mean_squared_error: 0.8972
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.8075 - root_mean_squared_error: 0.8986
Epoch 2: val_root_mean_squared_error improved from 0.89720 to 0.41037, saving model to cache/ensemble_camembert-base/models/mlp/9c9b88e3d727ee7efcc7e46815c5a0506914622fdc2c6cace5dad8495e0d2070_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6666 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5431,3.803596,1.950281,1.750992,3.803596
46,2.3037,1.171223,1.082231,0.826084,1.171223
69,1.0833,0.769027,0.876942,0.674317,0.769027


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 6.0346 - root_mean_squared_error: 2.4566
Epoch 1: val_root_mean_squared_error improved from inf to 0.48696, saving model to cache/ensemble_camembert-base/models/mlp/c92f44848a2384af63d3cfbddba60d63334221abe02da31e9e1c586b1e2533ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5163 - root_mean_squared_error: 1.5326 - val_loss: 0.2371 - val_root_mean_squared_error: 0.4870
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.2152 - root_mean_squared_error: 0.4638
Epoch 2: val_root_mean_squared_error did not improve from 0.48696
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5092 - root_mean_squared_error: 0.7095 - val_loss: 1.0287 - val_root_mean_squared_error: 1.0142
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0437,3.353325,1.831209,1.63057,3.353325
46,2.6066,1.028524,1.014162,0.816042,1.028524
69,1.0825,0.699104,0.836124,0.660663,0.699104


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 5.6184 - root_mean_squared_error: 2.3703
Epoch 1: val_root_mean_squared_error improved from inf to 0.65034, saving model to cache/ensemble_camembert-base/models/mlp/40166beeeec832cc93000864fa474708241e832c774bfc63085e6a64b143b22d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7037 - root_mean_squared_error: 1.5896 - val_loss: 0.4229 - val_root_mean_squared_error: 0.6503
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4595 - root_mean_squared_error: 0.6779
Epoch 2: val_root_mean_squared_error did not improve from 0.65034
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4732 - root_mean_squared_error: 0.6871 - val_loss: 0.4232 - val_root_mean_squared_error: 0.6505
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7409,4.363705,2.088948,1.827456,4.363705
46,2.2188,1.617529,1.271821,0.975955,1.617529
69,1.0955,1.084984,1.041626,0.86571,1.084984


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 6.0861 - root_mean_squared_error: 2.4670
Epoch 1: val_root_mean_squared_error improved from inf to 0.52013, saving model to cache/ensemble_camembert-base/models/mlp/98bfc68988dcd09c544d6055459ffd19f971da505be62b5b0071443f1b3ff25e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9003 - root_mean_squared_error: 1.6478 - val_loss: 0.2705 - val_root_mean_squared_error: 0.5201
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.3510 - root_mean_squared_error: 0.5924
Epoch 2: val_root_mean_squared_error improved from 0.52013 to 0.33598, saving model to cache/ensemble_camembert-base/models/mlp/98bfc68988dcd09c544d6055459ffd19f971da505be62b5b0071443f1b3ff25e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5159 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8249,4.708096,2.169815,1.917261,4.708096
46,2.4213,1.923198,1.386794,1.082211,1.923198
69,1.5181,1.234786,1.111209,0.908466,1.234786


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 5.8363 - root_mean_squared_error: 2.4158
Epoch 1: val_root_mean_squared_error improved from inf to 1.07881, saving model to cache/ensemble_camembert-base/models/mlp/c01eb476db289b13611dfb0f2d9326966a6579d18e466723c8a66d4b94c9cd51_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8342 - root_mean_squared_error: 1.6236 - val_loss: 1.1638 - val_root_mean_squared_error: 1.0788
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9469 - root_mean_squared_error: 0.9731
Epoch 2: val_root_mean_squared_error improved from 1.07881 to 0.59297, saving model to cache/ensemble_camembert-base/models/mlp/c01eb476db289b13611dfb0f2d9326966a6579d18e466723c8a66d4b94c9cd51_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4493 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7524,3.966276,1.991551,1.806006,3.966276
46,2.3727,1.214252,1.101931,0.878626,1.214252
69,1.1671,0.747984,0.864861,0.66758,0.747984


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 8.5185 - root_mean_squared_error: 2.9186
Epoch 1: val_root_mean_squared_error improved from inf to 0.96164, saving model to cache/ensemble_camembert-base/models/mlp/6bd5c4937040097e4da331b1bc970d6c456731b6eea09fbc46c64193b4cddbc1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9765 - root_mean_squared_error: 1.6420 - val_loss: 0.9247 - val_root_mean_squared_error: 0.9616
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.7526 - root_mean_squared_error: 0.8675
Epoch 2: val_root_mean_squared_error improved from 0.96164 to 0.45508, saving model to cache/ensemble_camembert-base/models/mlp/6bd5c4937040097e4da331b1bc970d6c456731b6eea09fbc46c64193b4cddbc1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4731 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6593,2.980458,1.7264,1.493591,2.980458
46,2.2668,0.957858,0.978702,0.762621,0.957858
69,1.3954,0.732944,0.856121,0.732642,0.732944


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 9.2394 - root_mean_squared_error: 3.0396
Epoch 1: val_root_mean_squared_error improved from inf to 0.61526, saving model to cache/ensemble_camembert-base/models/mlp/b0610079c7dba3b8ee1400012003fc1bfb340015fdc7a5731526be9ee8f80bb7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.6053 - root_mean_squared_error: 1.8087 - val_loss: 0.3785 - val_root_mean_squared_error: 0.6153
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4081 - root_mean_squared_error: 0.6388
Epoch 2: val_root_mean_squared_error did not improve from 0.61526
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5872 - root_mean_squared_error: 0.7614 - val_loss: 0.5687 - val_root_mean_squared_error: 0.7541
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0777,2.74528,1.656889,1.472327,2.74528
46,2.4613,0.761379,0.87257,0.681469,0.761379
69,0.967,0.56101,0.749006,0.590085,0.56101


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 6.5992 - root_mean_squared_error: 2.5689
Epoch 1: val_root_mean_squared_error improved from inf to 0.63126, saving model to cache/ensemble_camembert-base/models/mlp/1f19d39383ed69b8645ac611b534cdb8caa64f00d334c182edaa4880027af372_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4789 - root_mean_squared_error: 1.5161 - val_loss: 0.3985 - val_root_mean_squared_error: 0.6313
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3432 - root_mean_squared_error: 0.5858
Epoch 2: val_root_mean_squared_error improved from 0.63126 to 0.38605, saving model to cache/ensemble_camembert-base/models/mlp/1f19d39383ed69b8645ac611b534cdb8caa64f00d334c182edaa4880027af372_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4774 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6502,4.612404,2.147651,1.906085,4.612404
46,2.7799,1.86687,1.366335,0.984508,1.86687
69,1.3195,1.192549,1.092039,0.77583,1.192549


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 680ms/step - loss: 8.3407 - root_mean_squared_error: 2.8880
Epoch 1: val_root_mean_squared_error improved from inf to 0.72146, saving model to cache/ensemble_camembert-base/models/mlp/046669bded4eb526f94b634a704da991e82ceafe1170f0bbd45079b8b8116890_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7705 - root_mean_squared_error: 1.5819 - val_loss: 0.5205 - val_root_mean_squared_error: 0.7215
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5672 - root_mean_squared_error: 0.7531
Epoch 2: val_root_mean_squared_error improved from 0.72146 to 0.61330, saving model to cache/ensemble_camembert-base/models/mlp/046669bded4eb526f94b634a704da991e82ceafe1170f0bbd45079b8b8116890_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7297 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7814,3.709048,1.925889,1.724221,3.709048
46,2.5994,1.19167,1.091636,0.841512,1.19167
69,1.2971,0.759779,0.871653,0.646549,0.759779


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 685ms/step - loss: 5.2347 - root_mean_squared_error: 2.2880
Epoch 1: val_root_mean_squared_error improved from inf to 0.64432, saving model to cache/ensemble_camembert-base/models/mlp/b7b0b74bd389cd4600fb1c924840f539de51173128915e150c52fc7a7d53c2fd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5413 - root_mean_squared_error: 1.5364 - val_loss: 0.4151 - val_root_mean_squared_error: 0.6443
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1973 - root_mean_squared_error: 0.4442
Epoch 2: val_root_mean_squared_error did not improve from 0.64432
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3911 - root_mean_squared_error: 0.6215 - val_loss: 0.4691 - val_root_mean_squared_error: 0.6849
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0308,3.965865,1.991448,1.786321,3.965864
46,2.9469,1.354178,1.163692,0.928151,1.354178
69,1.3745,0.83872,0.915817,0.700007,0.83872


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.3829 - root_mean_squared_error: 2.7171
Epoch 1: val_root_mean_squared_error improved from inf to 1.16094, saving model to cache/ensemble_camembert-base/models/mlp/095265363eb408ee772168200c9599c4ca2b8b3c5ca57df55a4c507092b76fca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6026 - root_mean_squared_error: 1.5421 - val_loss: 1.3478 - val_root_mean_squared_error: 1.1609
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 1.6736 - root_mean_squared_error: 1.2937
Epoch 2: val_root_mean_squared_error improved from 1.16094 to 1.02805, saving model to cache/ensemble_camembert-base/models/mlp/095265363eb408ee772168200c9599c4ca2b8b3c5ca57df55a4c507092b76fca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0435 - root_mean_squared_error: 0.9

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7462,2.829085,1.681989,1.486946,2.829085
46,3.1662,0.826669,0.909213,0.66788,0.826669
69,1.2081,0.618486,0.786439,0.655866,0.618486


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 7.0714 - root_mean_squared_error: 2.6592
Epoch 1: val_root_mean_squared_error improved from inf to 0.53754, saving model to cache/ensemble_camembert-base/models/mlp/27fa6663f4946e24e468ade7e23ac63fc914c31bdc4cb045372b9aa696326b46_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.4472 - root_mean_squared_error: 1.7790 - val_loss: 0.2890 - val_root_mean_squared_error: 0.5375
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1865 - root_mean_squared_error: 0.4319
Epoch 2: val_root_mean_squared_error did not improve from 0.53754
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6545 - root_mean_squared_error: 0.7942 - val_loss: 0.7923 - val_root_mean_squared_error: 0.8901
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.092,3.824262,1.955572,1.72129,3.824262
46,2.3712,1.300114,1.140226,0.9219,1.300114
69,1.3391,0.868299,0.931826,0.741124,0.868299


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 4.8898 - root_mean_squared_error: 2.2113
Epoch 1: val_root_mean_squared_error improved from inf to 0.65343, saving model to cache/ensemble_camembert-base/models/mlp/fc9839c805b360b0fa9ae962b9afac00a945b4cf8d9a2d068df9712894d0fba4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7872 - root_mean_squared_error: 1.6198 - val_loss: 0.4270 - val_root_mean_squared_error: 0.6534
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6107 - root_mean_squared_error: 0.7815
Epoch 2: val_root_mean_squared_error did not improve from 0.65343
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.8430 - root_mean_squared_error: 0.9088 - val_loss: 0.4795 - val_root_mean_squared_error: 0.6924
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7206,4.03714,2.009264,1.747462,4.03714
46,2.3771,1.37311,1.171798,0.895182,1.37311
69,1.0245,0.992522,0.996254,0.827065,0.992522


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 679ms/step - loss: 7.4318 - root_mean_squared_error: 2.7261
Epoch 1: val_root_mean_squared_error improved from inf to 0.85722, saving model to cache/ensemble_camembert-base/models/mlp/006e6b92f1e08681bedd99c1c71c4c656b51bc9e5cb7ca05f3cc9f6f3851c595_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.3123 - root_mean_squared_error: 1.7456 - val_loss: 0.7348 - val_root_mean_squared_error: 0.8572
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 0.5002 - root_mean_squared_error: 0.7073
Epoch 2: val_root_mean_squared_error improved from 0.85722 to 0.70110, saving model to cache/ensemble_camembert-base/models/mlp/006e6b92f1e08681bedd99c1c71c4c656b51bc9e5cb7ca05f3cc9f6f3851c595_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4423 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0639,4.662696,2.159328,1.930183,4.662696
46,2.6973,1.739879,1.319045,1.020045,1.739879
69,1.0554,1.120695,1.058629,0.77971,1.120695


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 6.2395 - root_mean_squared_error: 2.4979
Epoch 1: val_root_mean_squared_error improved from inf to 0.67971, saving model to cache/ensemble_camembert-base/models/mlp/49bec529ad8d68925f64afc706996070e79d54e98bd3ecd07704d2fc81a3d290_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3824 - root_mean_squared_error: 1.7655 - val_loss: 0.4620 - val_root_mean_squared_error: 0.6797
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5546 - root_mean_squared_error: 0.7447
Epoch 2: val_root_mean_squared_error improved from 0.67971 to 0.42531, saving model to cache/ensemble_camembert-base/models/mlp/49bec529ad8d68925f64afc706996070e79d54e98bd3ecd07704d2fc81a3d290_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4008 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1728,4.651412,2.156713,1.914186,4.651411
46,2.4958,1.709696,1.307554,1.063596,1.709696
69,0.9522,1.101579,1.049562,0.862781,1.101579


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 7.9778 - root_mean_squared_error: 2.8245
Epoch 1: val_root_mean_squared_error improved from inf to 0.75558, saving model to cache/ensemble_camembert-base/models/mlp/b2a9ce0164f7f56ace57688da928ad7adffde891626f06b3921759250a5b02ce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2935 - root_mean_squared_error: 1.7386 - val_loss: 0.5709 - val_root_mean_squared_error: 0.7556
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 0.2371 - root_mean_squared_error: 0.4869
Epoch 2: val_root_mean_squared_error improved from 0.75558 to 0.41218, saving model to cache/ensemble_camembert-base/models/mlp/b2a9ce0164f7f56ace57688da928ad7adffde891626f06b3921759250a5b02ce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4792 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.025,4.047108,2.011742,1.67916,4.047107
46,2.5142,1.680995,1.296532,0.930529,1.680995
69,1.1492,1.255619,1.120544,0.900392,1.255619


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 8.5039 - root_mean_squared_error: 2.9161
Epoch 1: val_root_mean_squared_error improved from inf to 0.61563, saving model to cache/ensemble_camembert-base/models/mlp/cf185f594b57fc7b20804e86ce3bd67c8549dbef160e93b6a6bb2a81540246f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7181 - root_mean_squared_error: 1.5708 - val_loss: 0.3790 - val_root_mean_squared_error: 0.6156
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5443 - root_mean_squared_error: 0.7378
Epoch 2: val_root_mean_squared_error improved from 0.61563 to 0.45700, saving model to cache/ensemble_camembert-base/models/mlp/cf185f594b57fc7b20804e86ce3bd67c8549dbef160e93b6a6bb2a81540246f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4986 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.163,3.483586,1.866437,1.602117,3.483586
46,2.6127,1.219158,1.104155,0.837746,1.219158
69,0.9888,0.916326,0.957249,0.726448,0.916326


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 6.0555 - root_mean_squared_error: 2.4608
Epoch 1: val_root_mean_squared_error improved from inf to 1.00572, saving model to cache/ensemble_camembert-base/models/mlp/c24bae4a232597417afa0883aaca76b9267952ce314546f0e463c4442e56b3f8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6159 - root_mean_squared_error: 1.5505 - val_loss: 1.0115 - val_root_mean_squared_error: 1.0057
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5138 - root_mean_squared_error: 0.7168
Epoch 2: val_root_mean_squared_error did not improve from 1.00572
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4828 - root_mean_squared_error: 0.6920 - val_loss: 1.2893 - val_root_mean_squared_error: 1.1355
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 33ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4603,3.112117,1.764119,1.569617,3.112117
46,2.1051,0.911128,0.95453,0.750586,0.911128
69,1.0494,0.641398,0.800873,0.63056,0.641398


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 5.8070 - root_mean_squared_error: 2.4098
Epoch 1: val_root_mean_squared_error improved from inf to 0.79661, saving model to cache/ensemble_camembert-base/models/mlp/9dd0bcbfe8a9d1b0e3ced789f2aed0abb5ac05077df263d2b45578c2971a8b64_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3288 - root_mean_squared_error: 1.4674 - val_loss: 0.6346 - val_root_mean_squared_error: 0.7966
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.1644 - root_mean_squared_error: 1.0791
Epoch 2: val_root_mean_squared_error improved from 0.79661 to 0.53331, saving model to cache/ensemble_camembert-base/models/mlp/9dd0bcbfe8a9d1b0e3ced789f2aed0abb5ac05077df263d2b45578c2971a8b64_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6507 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2036,3.175295,1.781936,1.576911,3.175295
46,2.366,1.008348,1.004165,0.79618,1.008348
69,1.2652,0.681022,0.825241,0.660909,0.681022


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 4.8825 - root_mean_squared_error: 2.2096
Epoch 1: val_root_mean_squared_error improved from inf to 0.70138, saving model to cache/ensemble_camembert-base/models/mlp/f4de0cb00be1f6c7f3b79a2cefa1cc53db11c0fbda5c94979f4191552b2b9143_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4667 - root_mean_squared_error: 1.5198 - val_loss: 0.4919 - val_root_mean_squared_error: 0.7014
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3233 - root_mean_squared_error: 0.5686
Epoch 2: val_root_mean_squared_error improved from 0.70138 to 0.41540, saving model to cache/ensemble_camembert-base/models/mlp/f4de0cb00be1f6c7f3b79a2cefa1cc53db11c0fbda5c94979f4191552b2b9143_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4628 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7169,2.617586,1.617896,1.456417,2.617586
46,2.4667,0.634214,0.796376,0.626507,0.634214
69,1.2485,0.51234,0.715779,0.561508,0.51234


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 8.3209 - root_mean_squared_error: 2.8846
Epoch 1: val_root_mean_squared_error improved from inf to 0.65095, saving model to cache/ensemble_camembert-base/models/mlp/02d77c0935b1550a904e5e6fdf1cf36954ec4352cfb635254654ed04f183fe62_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9481 - root_mean_squared_error: 1.6443 - val_loss: 0.4237 - val_root_mean_squared_error: 0.6510
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3694 - root_mean_squared_error: 0.6078
Epoch 2: val_root_mean_squared_error improved from 0.65095 to 0.43079, saving model to cache/ensemble_camembert-base/models/mlp/02d77c0935b1550a904e5e6fdf1cf36954ec4352cfb635254654ed04f183fe62_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5306 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3093,3.800884,1.949586,1.765251,3.800885
46,2.0813,1.249063,1.117615,0.845718,1.249063
69,1.2637,0.755029,0.868924,0.653368,0.755029


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 5.4113 - root_mean_squared_error: 2.3262
Epoch 1: val_root_mean_squared_error improved from inf to 0.74698, saving model to cache/ensemble_camembert-base/models/mlp/997254962ddefe227569a42e24e51898dc13590b4207bff0d24ccc1b3571687d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.2865 - root_mean_squared_error: 1.4601 - val_loss: 0.5580 - val_root_mean_squared_error: 0.7470
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3216 - root_mean_squared_error: 0.5671
Epoch 2: val_root_mean_squared_error improved from 0.74698 to 0.52935, saving model to cache/ensemble_camembert-base/models/mlp/997254962ddefe227569a42e24e51898dc13590b4207bff0d24ccc1b3571687d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.8189 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1767,4.639756,2.154009,1.868301,4.639756
46,2.3407,1.821259,1.34954,1.065005,1.821259
69,0.9318,1.246664,1.116541,0.892031,1.246664


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 8.3493 - root_mean_squared_error: 2.8895
Epoch 1: val_root_mean_squared_error improved from inf to 0.64548, saving model to cache/ensemble_camembert-base/models/mlp/8d41d49f14bc51adc877083aa3799bce461f88672a3661543353ece740d8f903_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7962 - root_mean_squared_error: 1.5919 - val_loss: 0.4166 - val_root_mean_squared_error: 0.6455
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2240 - root_mean_squared_error: 0.4733
Epoch 2: val_root_mean_squared_error improved from 0.64548 to 0.56140, saving model to cache/ensemble_camembert-base/models/mlp/8d41d49f14bc51adc877083aa3799bce461f88672a3661543353ece740d8f903_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4411 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6656,3.84266,1.96027,1.684063,3.84266
46,2.4813,1.478535,1.21595,0.963509,1.478535
69,1.304,1.039717,1.019665,0.834963,1.039717


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 5.9847 - root_mean_squared_error: 2.4464
Epoch 1: val_root_mean_squared_error improved from inf to 0.60742, saving model to cache/ensemble_camembert-base/models/mlp/f0814ebe8f8e9e305b9f5c6b641236468675b4eb3d8137449aa7dbb284e02454_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5034 - root_mean_squared_error: 1.5248 - val_loss: 0.3690 - val_root_mean_squared_error: 0.6074
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2423 - root_mean_squared_error: 0.4922
Epoch 2: val_root_mean_squared_error did not improve from 0.60742
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5103 - root_mean_squared_error: 0.7090 - val_loss: 0.3780 - val_root_mean_squared_error: 0.6148
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7297,3.411041,1.8469,1.576699,3.411041
46,2.5191,1.247822,1.117059,0.833322,1.247822
69,1.5015,0.93454,0.966716,0.76283,0.93454


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 7.2074 - root_mean_squared_error: 2.6847
Epoch 1: val_root_mean_squared_error improved from inf to 0.50646, saving model to cache/ensemble_camembert-base/models/mlp/53e3cfbc2042fca9cdd32768a3cbd77c2f5a9a784a0b77a99cf4a19f30c5c78c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7617 - root_mean_squared_error: 1.5902 - val_loss: 0.2565 - val_root_mean_squared_error: 0.5065
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2025 - root_mean_squared_error: 0.4499
Epoch 2: val_root_mean_squared_error did not improve from 0.50646
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5413 - root_mean_squared_error: 0.7257 - val_loss: 0.4754 - val_root_mean_squared_error: 0.6895
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2727,3.055055,1.747872,1.576384,3.055055
46,2.4143,0.816933,0.903844,0.717534,0.816933
69,1.1256,0.562774,0.750183,0.577507,0.562774


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 701ms/step - loss: 6.8993 - root_mean_squared_error: 2.6267
Epoch 1: val_root_mean_squared_error improved from inf to 0.56132, saving model to cache/ensemble_camembert-base/models/mlp/83b1a39db8964db9bd4d59b269f94b00f1017352da17ce37a917e023cae3122a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5606 - root_mean_squared_error: 1.5326 - val_loss: 0.3151 - val_root_mean_squared_error: 0.5613
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4873 - root_mean_squared_error: 0.6981
Epoch 2: val_root_mean_squared_error improved from 0.56132 to 0.53579, saving model to cache/ensemble_camembert-base/models/mlp/83b1a39db8964db9bd4d59b269f94b00f1017352da17ce37a917e023cae3122a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4693 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1033,4.352822,2.086342,1.870754,4.352823
46,2.2068,1.65032,1.284648,1.019839,1.650319
69,1.2951,1.015523,1.007732,0.820053,1.015523


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 4.1807 - root_mean_squared_error: 2.0447
Epoch 1: val_root_mean_squared_error improved from inf to 0.74783, saving model to cache/ensemble_camembert-base/models/mlp/8dbb243a50642c7b195106006eea75546940ee11e734d41779ff9d55f561a856_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7710 - root_mean_squared_error: 1.6170 - val_loss: 0.5593 - val_root_mean_squared_error: 0.7478
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.0270 - root_mean_squared_error: 1.0134
Epoch 2: val_root_mean_squared_error improved from 0.74783 to 0.44435, saving model to cache/ensemble_camembert-base/models/mlp/8dbb243a50642c7b195106006eea75546940ee11e734d41779ff9d55f561a856_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6154 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.91,3.226424,1.796225,1.583318,3.226424
46,2.384,0.984334,0.992136,0.787624,0.984334
69,1.1023,0.710079,0.842662,0.697096,0.710079


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 11.2935 - root_mean_squared_error: 3.3606
Epoch 1: val_root_mean_squared_error improved from inf to 0.94875, saving model to cache/ensemble_camembert-base/models/mlp/70ffdbf879bf2e134f7aeda6ea22a939e9b0f66cf7ef86e2776331d25709bf1a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.5854 - root_mean_squared_error: 1.8001 - val_loss: 0.9001 - val_root_mean_squared_error: 0.9487
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.7345 - root_mean_squared_error: 0.8571
Epoch 2: val_root_mean_squared_error improved from 0.94875 to 0.75428, saving model to cache/ensemble_camembert-base/models/mlp/70ffdbf879bf2e134f7aeda6ea22a939e9b0f66cf7ef86e2776331d25709bf1a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5792 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2845,3.672606,1.916405,1.669081,3.672606
46,2.019,1.24933,1.117735,0.861979,1.24933
69,1.1011,0.887503,0.942074,0.768918,0.887503


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 5.9269 - root_mean_squared_error: 2.4345
Epoch 1: val_root_mean_squared_error improved from inf to 0.72229, saving model to cache/ensemble_camembert-base/models/mlp/483c52c1f630b35f111bca22453a912d4bab4f0c1260a4cae2d5cc66abcec532_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3890 - root_mean_squared_error: 1.4896 - val_loss: 0.5217 - val_root_mean_squared_error: 0.7223
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3657 - root_mean_squared_error: 0.6048
Epoch 2: val_root_mean_squared_error improved from 0.72229 to 0.71932, saving model to cache/ensemble_camembert-base/models/mlp/483c52c1f630b35f111bca22453a912d4bab4f0c1260a4cae2d5cc66abcec532_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4076 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.647,2.990746,1.729377,1.529036,2.990746
46,2.3582,0.904842,0.951232,0.759448,0.904842
69,1.1376,0.641065,0.800666,0.65546,0.641065


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 7.2895 - root_mean_squared_error: 2.6999
Epoch 1: val_root_mean_squared_error improved from inf to 0.39992, saving model to cache/ensemble_camembert-base/models/mlp/c93d1d8b63c88973e0bf2afa410b1f498d509b4f864c5c87c377408a1d17c084_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8787 - root_mean_squared_error: 1.6254 - val_loss: 0.1599 - val_root_mean_squared_error: 0.3999
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7513 - root_mean_squared_error: 0.8668
Epoch 2: val_root_mean_squared_error improved from 0.39992 to 0.35586, saving model to cache/ensemble_camembert-base/models/mlp/c93d1d8b63c88973e0bf2afa410b1f498d509b4f864c5c87c377408a1d17c084_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5153 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7701,3.906923,1.976594,1.666565,3.906923
46,2.7403,1.514671,1.23072,0.95434,1.514671
69,1.0802,1.12092,1.058735,0.8595,1.12092


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 9.9008 - root_mean_squared_error: 3.1465
Epoch 1: val_root_mean_squared_error improved from inf to 0.52979, saving model to cache/ensemble_camembert-base/models/mlp/4dc8f5a523b82a95c57278aa7e88f6f57e970bfcf77360f48f5203d3d4f74837_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7970 - root_mean_squared_error: 1.5877 - val_loss: 0.2807 - val_root_mean_squared_error: 0.5298
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5529 - root_mean_squared_error: 0.7436
Epoch 2: val_root_mean_squared_error improved from 0.52979 to 0.41390, saving model to cache/ensemble_camembert-base/models/mlp/4dc8f5a523b82a95c57278aa7e88f6f57e970bfcf77360f48f5203d3d4f74837_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.8292 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6785,4.293563,2.072091,1.83682,4.293563
46,2.0907,1.464893,1.210328,0.952742,1.464893
69,1.1014,0.979773,0.989835,0.780248,0.979773


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 8.0985 - root_mean_squared_error: 2.8458
Epoch 1: val_root_mean_squared_error improved from inf to 1.04592, saving model to cache/ensemble_camembert-base/models/mlp/8afedf718fd09155412cd08bc9ef17cf89fbdea2e43a89c8385fe3cda7ec84b9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0857 - root_mean_squared_error: 1.6803 - val_loss: 1.0939 - val_root_mean_squared_error: 1.0459
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8063 - root_mean_squared_error: 0.8979
Epoch 2: val_root_mean_squared_error improved from 1.04592 to 0.50112, saving model to cache/ensemble_camembert-base/models/mlp/8afedf718fd09155412cd08bc9ef17cf89fbdea2e43a89c8385fe3cda7ec84b9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4842 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0268,3.103596,1.761703,1.551157,3.103596
46,2.4714,0.935905,0.967422,0.772533,0.935905
69,1.2974,0.689107,0.830125,0.690895,0.689107


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 8.0932 - root_mean_squared_error: 2.8449
Epoch 1: val_root_mean_squared_error improved from inf to 0.50846, saving model to cache/ensemble_camembert-base/models/mlp/9421b04c2d8a75ef269a142243e2c9d0ff0a79540f4e08ac6ca0a808c54dd99d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5696 - root_mean_squared_error: 1.5351 - val_loss: 0.2585 - val_root_mean_squared_error: 0.5085
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3551 - root_mean_squared_error: 0.5959
Epoch 2: val_root_mean_squared_error improved from 0.50846 to 0.41445, saving model to cache/ensemble_camembert-base/models/mlp/9421b04c2d8a75ef269a142243e2c9d0ff0a79540f4e08ac6ca0a808c54dd99d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4610 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4749,4.596589,2.143966,1.889934,4.596588
46,2.5622,1.848885,1.359737,1.033014,1.848885
69,1.3434,1.201466,1.096114,0.818283,1.201467


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 7.0055 - root_mean_squared_error: 2.6468
Epoch 1: val_root_mean_squared_error improved from inf to 0.57019, saving model to cache/ensemble_camembert-base/models/mlp/b87fc93c444beb317690a0cdff38ec95e97539572aea9d8fd1d1493a9a70c464_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8549 - root_mean_squared_error: 1.6209 - val_loss: 0.3251 - val_root_mean_squared_error: 0.5702
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3514 - root_mean_squared_error: 0.5928
Epoch 2: val_root_mean_squared_error did not improve from 0.57019
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4482 - root_mean_squared_error: 0.6612 - val_loss: 1.0784 - val_root_mean_squared_error: 1.0385
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0707,4.090036,2.022384,1.817365,4.090036
46,2.3618,1.335125,1.155476,0.924791,1.335125
69,1.0725,0.836661,0.914692,0.729735,0.836661


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 7.5891 - root_mean_squared_error: 2.7548
Epoch 1: val_root_mean_squared_error improved from inf to 0.67239, saving model to cache/ensemble_camembert-base/models/mlp/1be993e12967a8ba1c83c27ebea1aee09566abf310f07a6a7cab6b310970de73_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2672 - root_mean_squared_error: 1.7323 - val_loss: 0.4521 - val_root_mean_squared_error: 0.6724
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3664 - root_mean_squared_error: 0.6053
Epoch 2: val_root_mean_squared_error improved from 0.67239 to 0.54479, saving model to cache/ensemble_camembert-base/models/mlp/1be993e12967a8ba1c83c27ebea1aee09566abf310f07a6a7cab6b310970de73_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4750 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5503,3.933802,1.983381,1.798463,3.933802
46,2.1755,1.21386,1.101753,0.890995,1.21386
69,1.0758,0.743398,0.862205,0.727861,0.743398


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 5.9945 - root_mean_squared_error: 2.4484
Epoch 1: val_root_mean_squared_error improved from inf to 0.58404, saving model to cache/ensemble_camembert-base/models/mlp/125afbfde21d6fce4900b6beb18125989c8d345fb1f51e204d8354210c50de35_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4127 - root_mean_squared_error: 1.4965 - val_loss: 0.3411 - val_root_mean_squared_error: 0.5840
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1930 - root_mean_squared_error: 0.4393
Epoch 2: val_root_mean_squared_error improved from 0.58404 to 0.48954, saving model to cache/ensemble_camembert-base/models/mlp/125afbfde21d6fce4900b6beb18125989c8d345fb1f51e204d8354210c50de35_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4457 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1898,4.461369,2.112195,1.900541,4.461369
46,2.0143,1.553425,1.246365,0.971104,1.553425
69,1.0426,0.976762,0.988313,0.775927,0.976762


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 7.6971 - root_mean_squared_error: 2.7744
Epoch 1: val_root_mean_squared_error improved from inf to 0.59563, saving model to cache/ensemble_camembert-base/models/mlp/cb6873f833a31fdfa2b1f0fd149dc95e78c529e0a63b55b4b3135f49dbb89a87_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0339 - root_mean_squared_error: 1.6660 - val_loss: 0.3548 - val_root_mean_squared_error: 0.5956
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3331 - root_mean_squared_error: 0.5771
Epoch 2: val_root_mean_squared_error improved from 0.59563 to 0.45708, saving model to cache/ensemble_camembert-base/models/mlp/cb6873f833a31fdfa2b1f0fd149dc95e78c529e0a63b55b4b3135f49dbb89a87_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4317 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.273,3.014478,1.736225,1.560118,3.014479
46,2.7516,0.888157,0.942421,0.794115,0.888157
69,1.2543,0.585762,0.765351,0.62357,0.585762


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 9.4404 - root_mean_squared_error: 3.0725
Epoch 1: val_root_mean_squared_error improved from inf to 0.55952, saving model to cache/ensemble_camembert-base/models/mlp/9086824a215aa3a754fbd8efbae63ad1d6264fb63944d6b33c452d433dc8a2bd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1625 - root_mean_squared_error: 1.6972 - val_loss: 0.3131 - val_root_mean_squared_error: 0.5595
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.3643 - root_mean_squared_error: 0.6035
Epoch 2: val_root_mean_squared_error did not improve from 0.55952
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4431 - root_mean_squared_error: 0.6612 - val_loss: 1.0612 - val_root_mean_squared_error: 1.0302
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6962,3.060973,1.749564,1.600394,3.060973
46,2.5577,0.850617,0.922289,0.751247,0.850617
69,1.5973,0.503534,0.709601,0.596392,0.503534


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.4936 - root_mean_squared_error: 2.5483
Epoch 1: val_root_mean_squared_error improved from inf to 0.47716, saving model to cache/ensemble_camembert-base/models/mlp/1bc5b333d0ae5230beb38025c6ecd92a1cce88dcef8bc965126ea107ea365691_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6779 - root_mean_squared_error: 1.5737 - val_loss: 0.2277 - val_root_mean_squared_error: 0.4772
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2125 - root_mean_squared_error: 0.4610
Epoch 2: val_root_mean_squared_error improved from 0.47716 to 0.35108, saving model to cache/ensemble_camembert-base/models/mlp/1bc5b333d0ae5230beb38025c6ecd92a1cce88dcef8bc965126ea107ea365691_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5081 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1312,4.426154,2.103843,1.873035,4.426154
46,2.152,1.575585,1.255223,1.022787,1.575585
69,1.0086,1.005647,1.00282,0.837349,1.005647


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 4.9565 - root_mean_squared_error: 2.2263
Epoch 1: val_root_mean_squared_error improved from inf to 0.88652, saving model to cache/ensemble_camembert-base/models/mlp/052a413bf3b007ee863fabd842e7c182894a9c6f615dad61c66192c22641ce00_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4201 - root_mean_squared_error: 1.4969 - val_loss: 0.7859 - val_root_mean_squared_error: 0.8865
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2441 - root_mean_squared_error: 0.4940
Epoch 2: val_root_mean_squared_error improved from 0.88652 to 0.68108, saving model to cache/ensemble_camembert-base/models/mlp/052a413bf3b007ee863fabd842e7c182894a9c6f615dad61c66192c22641ce00_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4612 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8631,3.36572,1.83459,1.570721,3.365721
46,2.5269,1.173608,1.083332,0.800043,1.173608
69,1.0367,0.894477,0.945768,0.776788,0.894477


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 688ms/step - loss: 7.1861 - root_mean_squared_error: 2.6807
Epoch 1: val_root_mean_squared_error improved from inf to 0.66974, saving model to cache/ensemble_camembert-base/models/mlp/187204402837291fc3e3147e4a4bec1d21ef3b70e47302c50b36064d69428e13_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9466 - root_mean_squared_error: 1.6532 - val_loss: 0.4485 - val_root_mean_squared_error: 0.6697
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1896 - root_mean_squared_error: 0.4354
Epoch 2: val_root_mean_squared_error improved from 0.66974 to 0.56718, saving model to cache/ensemble_camembert-base/models/mlp/187204402837291fc3e3147e4a4bec1d21ef3b70e47302c50b36064d69428e13_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7281 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.813,3.339229,1.827356,1.657601,3.339229
46,2.6231,0.905357,0.951502,0.764127,0.905357
69,1.0838,0.588749,0.7673,0.615929,0.588749


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 6.4442 - root_mean_squared_error: 2.5385
Epoch 1: val_root_mean_squared_error improved from inf to 0.47243, saving model to cache/ensemble_camembert-base/models/mlp/af0b1614d42172262aa063f83af5042bafb9d87d5762dfe48ef50b307aaafe7f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7255 - root_mean_squared_error: 1.5891 - val_loss: 0.2232 - val_root_mean_squared_error: 0.4724
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1462 - root_mean_squared_error: 0.3823
Epoch 2: val_root_mean_squared_error did not improve from 0.47243
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.4473 - root_mean_squared_error: 0.6588 - val_loss: 0.3321 - val_root_mean_squared_error: 0.5762
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - los

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9669,3.896114,1.973858,1.748042,3.896114
46,2.5907,1.322574,1.150032,0.900828,1.322574
69,1.0181,0.873729,0.934735,0.713933,0.873729


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 5.6914 - root_mean_squared_error: 2.3857
Epoch 1: val_root_mean_squared_error improved from inf to 0.57709, saving model to cache/ensemble_camembert-base/models/mlp/a596d8a06aeb2a7f837975282b4a4cfc3b0639c68c419d087a9f347d58f9765d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5818 - root_mean_squared_error: 1.5487 - val_loss: 0.3330 - val_root_mean_squared_error: 0.5771
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.3660 - root_mean_squared_error: 0.6050
Epoch 2: val_root_mean_squared_error improved from 0.57709 to 0.47505, saving model to cache/ensemble_camembert-base/models/mlp/a596d8a06aeb2a7f837975282b4a4cfc3b0639c68c419d087a9f347d58f9765d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4135 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8018,4.029327,2.007318,1.784995,4.029327
46,2.2238,1.354047,1.163635,0.906991,1.354047
69,1.2103,0.879355,0.937739,0.77786,0.879355


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 6.4628 - root_mean_squared_error: 2.5422
Epoch 1: val_root_mean_squared_error improved from inf to 0.57211, saving model to cache/ensemble_camembert-base/models/mlp/a25f26fe7618185237b5994dac4a484254d3e981ed2b9bc9c4cd88ae67097ae3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0638 - root_mean_squared_error: 1.6876 - val_loss: 0.3273 - val_root_mean_squared_error: 0.5721
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4956 - root_mean_squared_error: 0.7040
Epoch 2: val_root_mean_squared_error did not improve from 0.57211
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5959 - root_mean_squared_error: 0.7694 - val_loss: 0.6564 - val_root_mean_squared_error: 0.8102
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7471,4.937341,2.222013,1.950991,4.937341
46,2.4677,2.08702,1.444652,1.087945,2.08702
69,1.4695,1.368407,1.169789,0.93227,1.368407


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 692ms/step - loss: 5.2397 - root_mean_squared_error: 2.2890
Epoch 1: val_root_mean_squared_error improved from inf to 1.04088, saving model to cache/ensemble_camembert-base/models/mlp/95fd9d695785a6d72912c0c14185e2207b36dc63f3376dc1cdf28a67c55ea839_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4058 - root_mean_squared_error: 1.5007 - val_loss: 1.0834 - val_root_mean_squared_error: 1.0409
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5969 - root_mean_squared_error: 0.7726
Epoch 2: val_root_mean_squared_error improved from 1.04088 to 0.43672, saving model to cache/ensemble_camembert-base/models/mlp/95fd9d695785a6d72912c0c14185e2207b36dc63f3376dc1cdf28a67c55ea839_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3302 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1554,3.746655,1.935628,1.774804,3.746655
46,2.395,1.06893,1.033891,0.80722,1.06893
69,1.2264,0.625647,0.790979,0.605087,0.625647


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 9.1975 - root_mean_squared_error: 3.0327
Epoch 1: val_root_mean_squared_error improved from inf to 0.97947, saving model to cache/ensemble_camembert-base/models/mlp/af52127018501ffc43aa59e690e964b2abcaa234084168b6a40cd735844600f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2085 - root_mean_squared_error: 1.7073 - val_loss: 0.9594 - val_root_mean_squared_error: 0.9795
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.1163 - root_mean_squared_error: 1.0566
Epoch 2: val_root_mean_squared_error improved from 0.97947 to 0.91710, saving model to cache/ensemble_camembert-base/models/mlp/af52127018501ffc43aa59e690e964b2abcaa234084168b6a40cd735844600f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4991 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9061,3.249499,1.802637,1.566339,3.249499
46,2.3264,1.058617,1.028891,0.808161,1.058617
69,1.2166,0.78348,0.885144,0.750971,0.78348


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 9.0063 - root_mean_squared_error: 3.0011
Epoch 1: val_root_mean_squared_error improved from inf to 0.57064, saving model to cache/ensemble_camembert-base/models/mlp/0bc6c29be7f243b9e738e0f32a52821511f6c0f190f13b0532e71f9b46ea18a2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3599 - root_mean_squared_error: 1.7454 - val_loss: 0.3256 - val_root_mean_squared_error: 0.5706
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3857 - root_mean_squared_error: 0.6210
Epoch 2: val_root_mean_squared_error improved from 0.57064 to 0.52320, saving model to cache/ensemble_camembert-base/models/mlp/0bc6c29be7f243b9e738e0f32a52821511f6c0f190f13b0532e71f9b46ea18a2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4886 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2822,2.759021,1.66103,1.489176,2.759021
46,2.3278,0.736322,0.858092,0.674009,0.736322
69,1.0532,0.527118,0.726029,0.557088,0.527118


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 6.2462 - root_mean_squared_error: 2.4992
Epoch 1: val_root_mean_squared_error improved from inf to 0.55393, saving model to cache/ensemble_camembert-base/models/mlp/183f903ea3ace4c6c84b94fc7c5326c54cf755f66e7808187f741cb6ab8c9ad4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3530 - root_mean_squared_error: 1.4707 - val_loss: 0.3068 - val_root_mean_squared_error: 0.5539
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3305 - root_mean_squared_error: 0.5749
Epoch 2: val_root_mean_squared_error did not improve from 0.55393
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4788 - root_mean_squared_error: 0.6900 - val_loss: 0.4884 - val_root_mean_squared_error: 0.6988
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4283,5.164905,2.272643,2.034311,5.164905
46,2.6309,2.163681,1.470946,1.128283,2.163681
69,1.2967,1.375985,1.173024,0.867125,1.375985


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 687ms/step - loss: 8.5989 - root_mean_squared_error: 2.9324
Epoch 1: val_root_mean_squared_error improved from inf to 0.80294, saving model to cache/ensemble_camembert-base/models/mlp/f11d0165c931292c15a0e4fae4dcbd0290b167f5587ec7e68c07053b7a23e905_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8490 - root_mean_squared_error: 1.6056 - val_loss: 0.6447 - val_root_mean_squared_error: 0.8029
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4182 - root_mean_squared_error: 0.6466
Epoch 2: val_root_mean_squared_error did not improve from 0.80294
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5982 - root_mean_squared_error: 0.7684 - val_loss: 0.9624 - val_root_mean_squared_error: 0.9810
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9579,2.803984,1.67451,1.486953,2.803984
46,2.7041,0.770108,0.877558,0.666655,0.770108
69,1.223,0.602469,0.776189,0.583103,0.602469


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 5.2858 - root_mean_squared_error: 2.2991
Epoch 1: val_root_mean_squared_error improved from inf to 1.02940, saving model to cache/ensemble_camembert-base/models/mlp/a357ce1f360b11b29548b7de647364a6e7467085a8831c85a29793f7b3c37f0a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5114 - root_mean_squared_error: 1.5253 - val_loss: 1.0597 - val_root_mean_squared_error: 1.0294
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6065 - root_mean_squared_error: 0.7788
Epoch 2: val_root_mean_squared_error improved from 1.02940 to 0.61113, saving model to cache/ensemble_camembert-base/models/mlp/a357ce1f360b11b29548b7de647364a6e7467085a8831c85a29793f7b3c37f0a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5238 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6041,4.086731,2.021566,1.767281,4.086731
46,2.6046,1.513262,1.230147,0.942346,1.513262
69,1.2953,1.02682,1.013321,0.749893,1.02682


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 5.7712 - root_mean_squared_error: 2.4023
Epoch 1: val_root_mean_squared_error improved from inf to 1.13590, saving model to cache/ensemble_camembert-base/models/mlp/71fdcc3b8122b6ee1919855932e05d927b9bc8c6ff8b5c392de6af933844a3ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4837 - root_mean_squared_error: 1.5182 - val_loss: 1.2903 - val_root_mean_squared_error: 1.1359
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.2439 - root_mean_squared_error: 1.1153
Epoch 2: val_root_mean_squared_error improved from 1.13590 to 1.04743, saving model to cache/ensemble_camembert-base/models/mlp/71fdcc3b8122b6ee1919855932e05d927b9bc8c6ff8b5c392de6af933844a3ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.8608 - root_mean_squared_error: 0.9

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5485,2.697576,1.64243,1.439861,2.697576
46,3.1224,0.796131,0.892262,0.635673,0.796131
69,1.2233,0.633888,0.796171,0.655318,0.633888


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 5.2819 - root_mean_squared_error: 2.2982
Epoch 1: val_root_mean_squared_error improved from inf to 0.52870, saving model to cache/ensemble_camembert-base/models/mlp/a1fb1df38bb1083567f4a912152ec6e66ae90613a9eb44b35adf1b59163a5294_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1912 - root_mean_squared_error: 1.7193 - val_loss: 0.2795 - val_root_mean_squared_error: 0.5287
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1865 - root_mean_squared_error: 0.4318
Epoch 2: val_root_mean_squared_error did not improve from 0.52870
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4898 - root_mean_squared_error: 0.6932 - val_loss: 0.7333 - val_root_mean_squared_error: 0.8563
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9371,4.072225,2.017976,1.784658,4.072225
46,2.383,1.405784,1.185658,0.961321,1.405784
69,1.2131,0.92529,0.96192,0.800082,0.92529


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 647ms/step - loss: 4.8408 - root_mean_squared_error: 2.2002
Epoch 1: val_root_mean_squared_error improved from inf to 0.90085, saving model to cache/ensemble_camembert-base/models/mlp/6230434ae7f0f4673d8de43f0b8753b2340635545796f6c393e4c59401abacc4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4254 - root_mean_squared_error: 1.5092 - val_loss: 0.8115 - val_root_mean_squared_error: 0.9009
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1127 - root_mean_squared_error: 0.3357
Epoch 2: val_root_mean_squared_error improved from 0.90085 to 0.87098, saving model to cache/ensemble_camembert-base/models/mlp/6230434ae7f0f4673d8de43f0b8753b2340635545796f6c393e4c59401abacc4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6895 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5732,3.805108,1.950669,1.689666,3.805108
46,2.3155,1.270122,1.126997,0.868244,1.270122
69,0.9588,0.946937,0.973107,0.818019,0.946937


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 6.5780 - root_mean_squared_error: 2.5648
Epoch 1: val_root_mean_squared_error improved from inf to 0.78820, saving model to cache/ensemble_camembert-base/models/mlp/f6a0475238b2f72e8e97fa597d3d5e5fe89dac0adf4d5c4e7f07494fa5007fbe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9628 - root_mean_squared_error: 1.6524 - val_loss: 0.6213 - val_root_mean_squared_error: 0.7882
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3908 - root_mean_squared_error: 0.6252
Epoch 2: val_root_mean_squared_error did not improve from 0.78820
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6191 - root_mean_squared_error: 0.7774 - val_loss: 0.9748 - val_root_mean_squared_error: 0.9873
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8611,4.553985,2.134007,1.880993,4.553985
46,2.6615,1.735639,1.317437,1.011096,1.735639
69,1.1713,1.148748,1.071797,0.819185,1.148748


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 5.6142 - root_mean_squared_error: 2.3694
Epoch 1: val_root_mean_squared_error improved from inf to 0.50013, saving model to cache/ensemble_camembert-base/models/mlp/cee169265ea48030d95963d537ff514ef77b07792ffa7d65e6b0905f3634f920_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1496 - root_mean_squared_error: 1.7170 - val_loss: 0.2501 - val_root_mean_squared_error: 0.5001
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1960 - root_mean_squared_error: 0.4427
Epoch 2: val_root_mean_squared_error improved from 0.50013 to 0.45839, saving model to cache/ensemble_camembert-base/models/mlp/cee169265ea48030d95963d537ff514ef77b07792ffa7d65e6b0905f3634f920_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3949 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9683,4.680776,2.16351,1.953036,4.680775
46,2.4566,1.675129,1.294268,1.048165,1.675129
69,0.9406,1.017863,1.008892,0.813502,1.017862


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 8.2772 - root_mean_squared_error: 2.8770
Epoch 1: val_root_mean_squared_error improved from inf to 0.72440, saving model to cache/ensemble_camembert-base/models/mlp/cafa527369311aa9487252136af8938f9ed2234720056d9d913c415486694bb6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2702 - root_mean_squared_error: 1.7267 - val_loss: 0.5248 - val_root_mean_squared_error: 0.7244
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5510 - root_mean_squared_error: 0.7423
Epoch 2: val_root_mean_squared_error improved from 0.72440 to 0.41899, saving model to cache/ensemble_camembert-base/models/mlp/cafa527369311aa9487252136af8938f9ed2234720056d9d913c415486694bb6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3965 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0313,3.692905,1.921693,1.63877,3.692905
46,2.3141,1.401611,1.183897,0.837716,1.401611
69,1.0459,1.023863,1.011861,0.796839,1.023863


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 8.7648 - root_mean_squared_error: 2.9605
Epoch 1: val_root_mean_squared_error improved from inf to 0.83444, saving model to cache/ensemble_camembert-base/models/mlp/5eed6be817ccc55cb4f9507e59170cfe8070fcb1aee8bc32fa5e487cad137e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9220 - root_mean_squared_error: 1.6330 - val_loss: 0.6963 - val_root_mean_squared_error: 0.8344
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0428 - root_mean_squared_error: 1.0212
Epoch 2: val_root_mean_squared_error improved from 0.83444 to 0.57818, saving model to cache/ensemble_camembert-base/models/mlp/5eed6be817ccc55cb4f9507e59170cfe8070fcb1aee8bc32fa5e487cad137e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5345 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7657,3.7458,1.935407,1.673398,3.745799
46,2.4388,1.345455,1.159938,0.873796,1.345455
69,0.9928,0.956904,0.978215,0.773491,0.956904


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 5.1354 - root_mean_squared_error: 2.2661
Epoch 1: val_root_mean_squared_error improved from inf to 0.86821, saving model to cache/ensemble_camembert-base/models/mlp/0f9da934e850fc9ddd6a84688029dc634de43ddcea5499b59d4fabc29432cdd2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4567 - root_mean_squared_error: 1.5098 - val_loss: 0.7538 - val_root_mean_squared_error: 0.8682
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3982 - root_mean_squared_error: 0.6311
Epoch 2: val_root_mean_squared_error did not improve from 0.86821
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4814 - root_mean_squared_error: 0.6927 - val_loss: 0.9504 - val_root_mean_squared_error: 0.9749
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.0482,3.064559,1.750588,1.572865,3.064559
46,2.101,0.861658,0.928255,0.65094,0.861658
69,0.9744,0.587457,0.766458,0.564399,0.587457


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 7.5177 - root_mean_squared_error: 2.7418
Epoch 1: val_root_mean_squared_error improved from inf to 0.62807, saving model to cache/ensemble_camembert-base/models/mlp/cd4578834db3f93adbc44069c684be0580c004fffb9119e905cd91fcc1bed98b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6575 - root_mean_squared_error: 1.5575 - val_loss: 0.3945 - val_root_mean_squared_error: 0.6281
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4220 - root_mean_squared_error: 0.6496
Epoch 2: val_root_mean_squared_error improved from 0.62807 to 0.43500, saving model to cache/ensemble_camembert-base/models/mlp/cd4578834db3f93adbc44069c684be0580c004fffb9119e905cd91fcc1bed98b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3269 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4893,2.992668,1.729933,1.532718,2.992668
46,2.2664,0.912841,0.955427,0.734196,0.912841
69,1.375,0.63362,0.796003,0.611466,0.63362


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 4.7449 - root_mean_squared_error: 2.1783
Epoch 1: val_root_mean_squared_error improved from inf to 0.67642, saving model to cache/ensemble_camembert-base/models/mlp/a5a9a4854aeb7f72a8a9b8e32892a4e54820d2fabb11be13054c692258051210_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4672 - root_mean_squared_error: 1.5194 - val_loss: 0.4575 - val_root_mean_squared_error: 0.6764
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.3295 - root_mean_squared_error: 0.5740
Epoch 2: val_root_mean_squared_error improved from 0.67642 to 0.36868, saving model to cache/ensemble_camembert-base/models/mlp/a5a9a4854aeb7f72a8a9b8e32892a4e54820d2fabb11be13054c692258051210_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5186 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2985,3.009106,1.734678,1.540275,3.009106
46,2.2605,0.824461,0.907999,0.680073,0.824461
69,0.9981,0.639519,0.7997,0.636206,0.639519


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 651ms/step - loss: 8.7459 - root_mean_squared_error: 2.9573
Epoch 1: val_root_mean_squared_error improved from inf to 0.47474, saving model to cache/ensemble_camembert-base/models/mlp/5f8b16c5a7c32370e2acd0f8929834bb9456dbb195230a5666aeee530457cbb0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9947 - root_mean_squared_error: 1.6568 - val_loss: 0.2254 - val_root_mean_squared_error: 0.4747
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4075 - root_mean_squared_error: 0.6384
Epoch 2: val_root_mean_squared_error improved from 0.47474 to 0.34608, saving model to cache/ensemble_camembert-base/models/mlp/5f8b16c5a7c32370e2acd0f8929834bb9456dbb195230a5666aeee530457cbb0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5956 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1479,3.363371,1.833949,1.677695,3.36337
46,1.9613,0.983166,0.991547,0.759093,0.983166
69,1.2324,0.578192,0.760389,0.598983,0.578192


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 648ms/step - loss: 5.0549 - root_mean_squared_error: 2.2483
Epoch 1: val_root_mean_squared_error improved from inf to 1.14209, saving model to cache/ensemble_camembert-base/models/mlp/25573f5fdbafd33dc5d880922f136f3c15a97967f251c075c4331054c6393df1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.1386 - root_mean_squared_error: 1.4153 - val_loss: 1.3044 - val_root_mean_squared_error: 1.1421
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.7886 - root_mean_squared_error: 1.3374
Epoch 2: val_root_mean_squared_error improved from 1.14209 to 0.42148, saving model to cache/ensemble_camembert-base/models/mlp/25573f5fdbafd33dc5d880922f136f3c15a97967f251c075c4331054c6393df1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.9013 - root_mean_squared_error: 0.9

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2736,4.447734,2.108965,1.858497,4.447734
46,2.3142,1.640997,1.281014,1.021029,1.640997
69,0.8736,1.086777,1.042486,0.810575,1.086777


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 9.0950 - root_mean_squared_error: 3.0158
Epoch 1: val_root_mean_squared_error improved from inf to 0.61013, saving model to cache/ensemble_camembert-base/models/mlp/fe709aa942371b132a95782966e8e7efe2e93dca80ac9ef37d14fcdca605aabe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8035 - root_mean_squared_error: 1.5911 - val_loss: 0.3723 - val_root_mean_squared_error: 0.6101
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2954 - root_mean_squared_error: 0.5435
Epoch 2: val_root_mean_squared_error did not improve from 0.61013
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6794 - root_mean_squared_error: 0.8135 - val_loss: 0.8165 - val_root_mean_squared_error: 0.9036
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5996,2.930994,1.712014,1.478862,2.930993
46,2.6069,0.976659,0.98826,0.781023,0.976659
69,1.3578,0.746297,0.863885,0.727616,0.746297


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 5.6556 - root_mean_squared_error: 2.3782
Epoch 1: val_root_mean_squared_error improved from inf to 0.85030, saving model to cache/ensemble_camembert-base/models/mlp/d083a0ce7f81b925320fe3299134545653ffe67a36cd6ef2c818468c6d8c9988_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5034 - root_mean_squared_error: 1.5239 - val_loss: 0.7230 - val_root_mean_squared_error: 0.8503
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3497 - root_mean_squared_error: 0.5913
Epoch 2: val_root_mean_squared_error improved from 0.85030 to 0.44462, saving model to cache/ensemble_camembert-base/models/mlp/d083a0ce7f81b925320fe3299134545653ffe67a36cd6ef2c818468c6d8c9988_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5668 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.54,3.137876,1.771405,1.529979,3.137876
46,2.4486,1.064642,1.031815,0.780889,1.064642
69,1.3618,0.799702,0.89426,0.740255,0.799702


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.4852 - root_mean_squared_error: 2.7359
Epoch 1: val_root_mean_squared_error improved from inf to 0.55276, saving model to cache/ensemble_camembert-base/models/mlp/d166cf1fb6379e52591e8b9c4254d40ce58aeebdda74e3bd8492ad84a2dca142_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7721 - root_mean_squared_error: 1.5935 - val_loss: 0.3055 - val_root_mean_squared_error: 0.5528
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2053 - root_mean_squared_error: 0.4531
Epoch 2: val_root_mean_squared_error improved from 0.55276 to 0.47043, saving model to cache/ensemble_camembert-base/models/mlp/d166cf1fb6379e52591e8b9c4254d40ce58aeebdda74e3bd8492ad84a2dca142_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5245 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0385,3.31149,1.81975,1.662618,3.31149
46,2.243,0.902899,0.95021,0.742979,0.902899
69,0.9826,0.552807,0.74351,0.529316,0.552807


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 6.2951 - root_mean_squared_error: 2.5090
Epoch 1: val_root_mean_squared_error improved from inf to 0.40266, saving model to cache/ensemble_camembert-base/models/mlp/43eed69246bd9282f4a4e2b07e3e0e39980b6ea0c81608dd9085d6cd9597d69b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6833 - root_mean_squared_error: 1.5767 - val_loss: 0.1621 - val_root_mean_squared_error: 0.4027
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2103 - root_mean_squared_error: 0.4586
Epoch 2: val_root_mean_squared_error improved from 0.40266 to 0.36752, saving model to cache/ensemble_camembert-base/models/mlp/43eed69246bd9282f4a4e2b07e3e0e39980b6ea0c81608dd9085d6cd9597d69b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3380 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7548,4.4621,2.112368,1.879135,4.462099
46,2.1435,1.750049,1.322894,1.030099,1.750049
69,1.3474,1.105254,1.051311,0.818184,1.105254


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 4.7122 - root_mean_squared_error: 2.1708
Epoch 1: val_root_mean_squared_error improved from inf to 0.74926, saving model to cache/ensemble_camembert-base/models/mlp/bf7d445763353a455fb2ed8c1aa9d4be47cbe3341b3f4a30ceaa9e5f186c265e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6139 - root_mean_squared_error: 1.5708 - val_loss: 0.5614 - val_root_mean_squared_error: 0.7493
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2914 - root_mean_squared_error: 0.5398
Epoch 2: val_root_mean_squared_error improved from 0.74926 to 0.57105, saving model to cache/ensemble_camembert-base/models/mlp/bf7d445763353a455fb2ed8c1aa9d4be47cbe3341b3f4a30ceaa9e5f186c265e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5641 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5823,3.138814,1.77167,1.545934,3.138815
46,2.0462,0.969157,0.984457,0.736334,0.969157
69,1.1251,0.744577,0.862889,0.691536,0.744577


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 9.7080 - root_mean_squared_error: 3.1158
Epoch 1: val_root_mean_squared_error improved from inf to 0.89228, saving model to cache/ensemble_camembert-base/models/mlp/183458d6ef231572363d79c807b3a5168d5b76c59da1f13459a03e2ee3816770_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3436 - root_mean_squared_error: 1.7427 - val_loss: 0.7962 - val_root_mean_squared_error: 0.8923
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.6225 - root_mean_squared_error: 0.7890
Epoch 2: val_root_mean_squared_error improved from 0.89228 to 0.46038, saving model to cache/ensemble_camembert-base/models/mlp/183458d6ef231572363d79c807b3a5168d5b76c59da1f13459a03e2ee3816770_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6840 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.395,3.807214,1.951208,1.704565,3.807214
46,1.9631,1.311484,1.1452,0.913523,1.311484
69,1.0891,0.911256,0.954597,0.823692,0.911256


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 5.7898 - root_mean_squared_error: 2.4062
Epoch 1: val_root_mean_squared_error improved from inf to 0.63128, saving model to cache/ensemble_camembert-base/models/mlp/4976c05eeb22a9cdf14f2c4e4ae4c6ca8d92c6ebc48065fbd6200dadbc275e8f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3266 - root_mean_squared_error: 1.4663 - val_loss: 0.3985 - val_root_mean_squared_error: 0.6313
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5315 - root_mean_squared_error: 0.7290
Epoch 2: val_root_mean_squared_error improved from 0.63128 to 0.45777, saving model to cache/ensemble_camembert-base/models/mlp/4976c05eeb22a9cdf14f2c4e4ae4c6ca8d92c6ebc48065fbd6200dadbc275e8f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6070 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4015,2.930906,1.711989,1.508082,2.930906
46,2.25,0.88612,0.94134,0.756839,0.88612
69,1.0262,0.640954,0.800596,0.660341,0.640954


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 6.8025 - root_mean_squared_error: 2.6082
Epoch 1: val_root_mean_squared_error improved from inf to 0.43461, saving model to cache/ensemble_camembert-base/models/mlp/d8dc3a3bcd119757161db5929b64dac089221b5ac587519629bfc3de8ed7c4cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 3.0119 - root_mean_squared_error: 1.6665 - val_loss: 0.1889 - val_root_mean_squared_error: 0.4346
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5533 - root_mean_squared_error: 0.7438
Epoch 2: val_root_mean_squared_error improved from 0.43461 to 0.34650, saving model to cache/ensemble_camembert-base/models/mlp/d8dc3a3bcd119757161db5929b64dac089221b5ac587519629bfc3de8ed7c4cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5192 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9947,2.559696,1.599905,1.371571,2.559696
46,2.8229,0.77984,0.883085,0.710008,0.77984
69,1.0098,0.708614,0.841792,0.726189,0.708614


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 10.6675 - root_mean_squared_error: 3.2661
Epoch 1: val_root_mean_squared_error improved from inf to 0.39170, saving model to cache/ensemble_camembert-base/models/mlp/eb56009fedb982f754ad85aed01dadec57982231482e1ef07839c4bdf9268de7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9772 - root_mean_squared_error: 1.6352 - val_loss: 0.1534 - val_root_mean_squared_error: 0.3917
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.4959 - root_mean_squared_error: 0.7042
Epoch 2: val_root_mean_squared_error improved from 0.39170 to 0.36566, saving model to cache/ensemble_camembert-base/models/mlp/eb56009fedb982f754ad85aed01dadec57982231482e1ef07839c4bdf9268de7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5933 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7602,3.029783,1.740627,1.512485,3.029783
46,1.9727,0.915609,0.956875,0.749489,0.915609
69,1.1789,0.745055,0.863166,0.686407,0.745055


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 8.4826 - root_mean_squared_error: 2.9125
Epoch 1: val_root_mean_squared_error improved from inf to 0.67750, saving model to cache/ensemble_camembert-base/models/mlp/1f0680205112bab0fa887c0a2a33810a05bec5e449ce8aaa3aecb1affeb4c30c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4014 - root_mean_squared_error: 1.7628 - val_loss: 0.4590 - val_root_mean_squared_error: 0.6775
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.7154 - root_mean_squared_error: 0.8458
Epoch 2: val_root_mean_squared_error improved from 0.67750 to 0.45453, saving model to cache/ensemble_camembert-base/models/mlp/1f0680205112bab0fa887c0a2a33810a05bec5e449ce8aaa3aecb1affeb4c30c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4547 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6316,3.216952,1.793586,1.599693,3.216952
46,2.4755,0.94321,0.97119,0.772281,0.94321
69,1.2981,0.653141,0.808171,0.665819,0.653141


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 7.5094 - root_mean_squared_error: 2.7403
Epoch 1: val_root_mean_squared_error improved from inf to 0.48708, saving model to cache/ensemble_camembert-base/models/mlp/23d74afe30d4b952316e803e78a46796dc8cb76e6f7a3a8e71ebbc1cbafaef9e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4302 - root_mean_squared_error: 1.4922 - val_loss: 0.2373 - val_root_mean_squared_error: 0.4871
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5963 - root_mean_squared_error: 0.7722
Epoch 2: val_root_mean_squared_error improved from 0.48708 to 0.33039, saving model to cache/ensemble_camembert-base/models/mlp/23d74afe30d4b952316e803e78a46796dc8cb76e6f7a3a8e71ebbc1cbafaef9e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4721 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2354,3.755107,1.93781,1.708115,3.755107
46,2.713,1.359464,1.16596,0.857215,1.359464
69,1.1935,0.895807,0.946471,0.68689,0.895807


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 8.0865 - root_mean_squared_error: 2.8437
Epoch 1: val_root_mean_squared_error improved from inf to 0.56485, saving model to cache/ensemble_camembert-base/models/mlp/7a78e08dad16cc2b21c1c2cdba1fd614606ac0666f16abe470aa6f34dff8397b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0222 - root_mean_squared_error: 1.6635 - val_loss: 0.3191 - val_root_mean_squared_error: 0.5649
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2879 - root_mean_squared_error: 0.5365
Epoch 2: val_root_mean_squared_error improved from 0.56485 to 0.54657, saving model to cache/ensemble_camembert-base/models/mlp/7a78e08dad16cc2b21c1c2cdba1fd614606ac0666f16abe470aa6f34dff8397b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4888 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9945,4.032675,2.008152,1.81525,4.032675
46,2.1529,1.295519,1.138209,0.922964,1.295519
69,1.0894,0.795299,0.891795,0.706523,0.795299


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 6.7684 - root_mean_squared_error: 2.6016
Epoch 1: val_root_mean_squared_error improved from inf to 0.55988, saving model to cache/ensemble_camembert-base/models/mlp/483b33af5f90d2d7949f7aa3288319156264cd8b86a042df746190742285050b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9029 - root_mean_squared_error: 1.6373 - val_loss: 0.3135 - val_root_mean_squared_error: 0.5599
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2662 - root_mean_squared_error: 0.5159
Epoch 2: val_root_mean_squared_error did not improve from 0.55988
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4877 - root_mean_squared_error: 0.6900 - val_loss: 0.6727 - val_root_mean_squared_error: 0.8202
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6208,3.582821,1.892834,1.682324,3.582821
46,2.2387,1.112496,1.05475,0.835458,1.112497
69,1.1355,0.761719,0.872765,0.738184,0.761719


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 6.7881 - root_mean_squared_error: 2.6054
Epoch 1: val_root_mean_squared_error improved from inf to 0.63040, saving model to cache/ensemble_camembert-base/models/mlp/a98874d3af321769dacd345fef988fba1d8e5abf878391234b1670475bec4d29_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6024 - root_mean_squared_error: 1.5511 - val_loss: 0.3974 - val_root_mean_squared_error: 0.6304
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2112 - root_mean_squared_error: 0.4596
Epoch 2: val_root_mean_squared_error improved from 0.63040 to 0.59723, saving model to cache/ensemble_camembert-base/models/mlp/a98874d3af321769dacd345fef988fba1d8e5abf878391234b1670475bec4d29_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4910 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,4.9639,4.582104,2.140585,1.928902,4.582103
46,1.8892,1.59726,1.263828,0.970307,1.597261
69,1.0555,1.004456,1.002226,0.746271,1.004456


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 7.1720 - root_mean_squared_error: 2.6780
Epoch 1: val_root_mean_squared_error improved from inf to 0.70523, saving model to cache/ensemble_camembert-base/models/mlp/4e0c0e453d7907eb14cdeff662e022ed234097c96d59df1b7f89bf57b8b09c82_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9277 - root_mean_squared_error: 1.6428 - val_loss: 0.4973 - val_root_mean_squared_error: 0.7052
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2939 - root_mean_squared_error: 0.5421
Epoch 2: val_root_mean_squared_error improved from 0.70523 to 0.41858, saving model to cache/ensemble_camembert-base/models/mlp/4e0c0e453d7907eb14cdeff662e022ed234097c96d59df1b7f89bf57b8b09c82_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4788 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5968,2.993875,1.730282,1.517733,2.993875
46,2.6097,0.957906,0.978727,0.827998,0.957906
69,1.1285,0.687304,0.829038,0.692043,0.687304


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 9.0006 - root_mean_squared_error: 3.0001
Epoch 1: val_root_mean_squared_error improved from inf to 0.62108, saving model to cache/ensemble_camembert-base/models/mlp/b2f5822a98d84c3dcc011d46f3a437013cd7580faa1d4afe658dfa9579130925_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1543 - root_mean_squared_error: 1.6946 - val_loss: 0.3857 - val_root_mean_squared_error: 0.6211
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4326 - root_mean_squared_error: 0.6577
Epoch 2: val_root_mean_squared_error improved from 0.62108 to 0.37803, saving model to cache/ensemble_camembert-base/models/mlp/b2f5822a98d84c3dcc011d46f3a437013cd7580faa1d4afe658dfa9579130925_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4164 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6902,3.308425,1.818908,1.647594,3.308425
46,2.5468,1.015595,1.007767,0.797248,1.015595
69,1.4806,0.619552,0.787116,0.629309,0.619552


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.8467 - root_mean_squared_error: 2.6166
Epoch 1: val_root_mean_squared_error improved from inf to 0.51475, saving model to cache/ensemble_camembert-base/models/mlp/3faa95308897e7acf1cd78779ed6236072a67a0a737c4f0dda5f4c0d29bb6a37_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5418 - root_mean_squared_error: 1.5288 - val_loss: 0.2650 - val_root_mean_squared_error: 0.5147
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3280 - root_mean_squared_error: 0.5727
Epoch 2: val_root_mean_squared_error improved from 0.51475 to 0.46104, saving model to cache/ensemble_camembert-base/models/mlp/3faa95308897e7acf1cd78779ed6236072a67a0a737c4f0dda5f4c0d29bb6a37_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5193 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8556,4.189713,2.046879,1.812042,4.189714
46,2.1953,1.472016,1.213267,0.946528,1.472016
69,1.0254,0.971475,0.985634,0.770144,0.971475


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 5.0122 - root_mean_squared_error: 2.2388
Epoch 1: val_root_mean_squared_error improved from inf to 0.58017, saving model to cache/ensemble_camembert-base/models/mlp/824f87d6c7c60f9781842265a1024888cb76f930850c07e83abbaf20e904038a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.1555 - root_mean_squared_error: 1.4174 - val_loss: 0.3366 - val_root_mean_squared_error: 0.5802
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3955 - root_mean_squared_error: 0.6289
Epoch 2: val_root_mean_squared_error did not improve from 0.58017
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5138 - root_mean_squared_error: 0.7065 - val_loss: 0.6780 - val_root_mean_squared_error: 0.8234
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2803,3.381557,1.838901,1.554436,3.381557
46,2.2124,1.228499,1.108377,0.802022,1.228499
69,1.1291,0.958461,0.97901,0.779361,0.958461


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 5.5447 - root_mean_squared_error: 2.3547
Epoch 1: val_root_mean_squared_error improved from inf to 0.74339, saving model to cache/ensemble_camembert-base/models/mlp/144a1ad3bb37d3f062d095a029bb526e6b0dda7af7fadfd6336945264a60d876_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8064 - root_mean_squared_error: 1.6191 - val_loss: 0.5526 - val_root_mean_squared_error: 0.7434
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.7137 - root_mean_squared_error: 0.8448
Epoch 2: val_root_mean_squared_error improved from 0.74339 to 0.36075, saving model to cache/ensemble_camembert-base/models/mlp/144a1ad3bb37d3f062d095a029bb526e6b0dda7af7fadfd6336945264a60d876_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.8121 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4884,3.356256,1.832009,1.63247,3.356256
46,2.6398,1.015765,1.007852,0.799324,1.015765
69,1.0339,0.683325,0.826635,0.672532,0.683325


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 649ms/step - loss: 6.3547 - root_mean_squared_error: 2.5209
Epoch 1: val_root_mean_squared_error improved from inf to 0.51358, saving model to cache/ensemble_camembert-base/models/mlp/7f65c1db0bf5816778ccfac18dbf377cb824894ecf1f586c7be43ac5a083472a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9187 - root_mean_squared_error: 1.6418 - val_loss: 0.2638 - val_root_mean_squared_error: 0.5136
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1463 - root_mean_squared_error: 0.3824
Epoch 2: val_root_mean_squared_error improved from 0.51358 to 0.40755, saving model to cache/ensemble_camembert-base/models/mlp/7f65c1db0bf5816778ccfac18dbf377cb824894ecf1f586c7be43ac5a083472a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4819 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6316,3.183794,1.784319,1.551127,3.183794
46,2.8055,1.021346,1.010617,0.783869,1.021346
69,0.8888,0.767327,0.875972,0.71133,0.767327


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 6.9563 - root_mean_squared_error: 2.6375
Epoch 1: val_root_mean_squared_error improved from inf to 0.50345, saving model to cache/ensemble_camembert-base/models/mlp/cd4998d41f3be2232c447d11389f4d70eaf55c635ad49187ab78fbeadd7e3492_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - loss: 2.5994 - root_mean_squared_error: 1.5475 - val_loss: 0.2535 - val_root_mean_squared_error: 0.5035
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.1069 - root_mean_squared_error: 0.3270
Epoch 2: val_root_mean_squared_error improved from 0.50345 to 0.41271, saving model to cache/ensemble_camembert-base/models/mlp/cd4998d41f3be2232c447d11389f4d70eaf55c635ad49187ab78fbeadd7e3492_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3411 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7801,3.548751,1.883813,1.670988,3.548751
46,1.9938,1.13495,1.06534,0.792144,1.13495
69,1.3401,0.766679,0.875602,0.711158,0.766679


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 654ms/step - loss: 7.0324 - root_mean_squared_error: 2.6519
Epoch 1: val_root_mean_squared_error improved from inf to 0.57646, saving model to cache/ensemble_camembert-base/models/mlp/474df8b6e3907935850fd01c78e7384e00b4dd9a1d64a3fdd8ebdc32fcadfdf8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2259 - root_mean_squared_error: 1.7241 - val_loss: 0.3323 - val_root_mean_squared_error: 0.5765
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5558 - root_mean_squared_error: 0.7456
Epoch 2: val_root_mean_squared_error did not improve from 0.57646
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6019 - root_mean_squared_error: 0.7705 - val_loss: 1.3468 - val_root_mean_squared_error: 1.1605
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6143,4.722695,2.173176,1.907129,4.722696
46,2.4501,1.946471,1.39516,1.040616,1.946471
69,1.2886,1.26981,1.126859,0.890494,1.26981


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 688ms/step - loss: 4.7973 - root_mean_squared_error: 2.1903
Epoch 1: val_root_mean_squared_error improved from inf to 1.09422, saving model to cache/ensemble_camembert-base/models/mlp/73ddc85f8bf9c6e15b91737a92d7b7d80d0b2db80567c5393f027defb387fc3c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.5135 - root_mean_squared_error: 1.5340 - val_loss: 1.1973 - val_root_mean_squared_error: 1.0942
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6184 - root_mean_squared_error: 0.7864
Epoch 2: val_root_mean_squared_error improved from 1.09422 to 0.46828, saving model to cache/ensemble_camembert-base/models/mlp/73ddc85f8bf9c6e15b91737a92d7b7d80d0b2db80567c5393f027defb387fc3c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3554 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0654,3.375475,1.837247,1.664432,3.375475
46,2.15,0.935725,0.967329,0.723659,0.935726
69,1.169,0.604867,0.777732,0.586128,0.604867


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 687ms/step - loss: 10.1452 - root_mean_squared_error: 3.1851
Epoch 1: val_root_mean_squared_error improved from inf to 0.74577, saving model to cache/ensemble_camembert-base/models/mlp/b4d66ea1b50ce4ffaf7430ca73e58b8cf882116ec261949890cabb97dce89ef2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2636 - root_mean_squared_error: 1.7185 - val_loss: 0.5562 - val_root_mean_squared_error: 0.7458
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7299 - root_mean_squared_error: 0.8543
Epoch 2: val_root_mean_squared_error improved from 0.74577 to 0.38648, saving model to cache/ensemble_camembert-base/models/mlp/b4d66ea1b50ce4ffaf7430ca73e58b8cf882116ec261949890cabb97dce89ef2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4216 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6876,2.706158,1.64504,1.426401,2.706158
46,2.2232,0.835071,0.913822,0.707505,0.835071
69,1.2661,0.670153,0.818629,0.665106,0.670153


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 652ms/step - loss: 9.9335 - root_mean_squared_error: 3.1517
Epoch 1: val_root_mean_squared_error improved from inf to 0.48125, saving model to cache/ensemble_camembert-base/models/mlp/2915e102be27d8ee21602ad49f1da4d8af2bca91b52c9aa804e29667dee2d601_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.6472 - root_mean_squared_error: 1.8160 - val_loss: 0.2316 - val_root_mean_squared_error: 0.4812
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4166 - root_mean_squared_error: 0.6455
Epoch 2: val_root_mean_squared_error improved from 0.48125 to 0.38589, saving model to cache/ensemble_camembert-base/models/mlp/2915e102be27d8ee21602ad49f1da4d8af2bca91b52c9aa804e29667dee2d601_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5047 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3471,2.80457,1.674685,1.487371,2.80457
46,2.1425,0.804679,0.897039,0.690386,0.804679
69,1.0871,0.590086,0.76817,0.597138,0.590086


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 4.9298 - root_mean_squared_error: 2.2203
Epoch 1: val_root_mean_squared_error improved from inf to 0.63216, saving model to cache/ensemble_camembert-base/models/mlp/a26faa62580a71129dba42331db1036b03fb5a20d296815b9af286c97f35a242_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.0995 - root_mean_squared_error: 1.4004 - val_loss: 0.3996 - val_root_mean_squared_error: 0.6322
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1559 - root_mean_squared_error: 0.3948
Epoch 2: val_root_mean_squared_error did not improve from 0.63216
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4441 - root_mean_squared_error: 0.6615 - val_loss: 0.4139 - val_root_mean_squared_error: 0.6434
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

  df_macro_ensemble_scores = pd.concat([df_macro_ensemble_scores, new_row], ignore_index=True)


In [8]:
%rm -rf cache

In [6]:
from google.colab import files
!zip -r /content/logs.zip /content/training/cache/ensemble_camembert-base/logs

files.download('/content/logs.zip')

  adding: content/training/cache/ensemble_camembert-base/logs/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725239508.f00b55962c61.1259.41 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725240572.f00b55962c61.1259.81 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725238454.f00b55962c61.1259.1 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725242712.f00b55962c61.1259.161 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725241644.f00b55962c61.1259.121 (deflated 62%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_26/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/mem

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!cp -r /content/training/cache/ensemble_camembert-base/models /content/drive/MyDrive/Models0109

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
