## A. Installation

### A.1. Structure

Pour réinitialiser la structure (from scratch) :

In [1]:
!cd /content
!rm -rf /content/Merval
!git clone https://github.com/mervealgan/Merval

Cloning into 'Merval'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 35 (delta 12), reused 29 (delta 11), pack-reused 0 (from 0)[K
Receiving objects: 100% (35/35), 366.01 KiB | 1.58 MiB/s, done.
Resolving deltas: 100% (12/12), done.


In [2]:
!rm -rf /content/training/data
!mkdir -p /content/training/data/features

!cp -r /content/Merval/data/features/* /content/training/data/features
!cp /content/Merval/data/test_set.csv /content/training/data/test_set.csv
!cp /content/Merval/data/training_set.csv /content/training/data/training_set.csv
!cp /content/Merval/data/valid_set.csv /content/training/data/valid_set.csv

In [3]:
%cd /content/training/

/content/training


### A.2. imports

In [4]:
!pip install tbparse
!pip install syntok
!pip install stanza
!pip install textcomplexity
!pip install transformers[torch]
!pip install accelerate -U

Collecting tbparse
  Downloading tbparse-0.0.9-py3-none-any.whl.metadata (8.7 kB)
Downloading tbparse-0.0.9-py3-none-any.whl (19 kB)
Installing collected packages: tbparse
Successfully installed tbparse-0.0.9
Collecting syntok
  Downloading syntok-1.4.4-py3-none-any.whl.metadata (10 kB)
Downloading syntok-1.4.4-py3-none-any.whl (24 kB)
Installing collected packages: syntok
Successfully installed syntok-1.4.4
Collecting stanza
  Downloading stanza-1.8.2-py3-none-any.whl.metadata (13 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.12.1-py3-none-any.whl.metadata (5.4 kB)
Downloading stanza-1.8.2-py3-none-any.whl (990 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.1/990.1 kB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading emoji-2.12.1-py3-none-any.whl (431 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m431.4/431.4 kB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji, stanza
Successfully in

## B. Entrainement

In [5]:
import hashlib
import os
import torch
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import (Sequential, load_model)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from transformers import (AutoTokenizer, TrainingArguments, AutoModelForSequenceClassification, set_seed, Trainer,
                          EarlyStoppingCallback, )


class TCCDataset(torch.utils.data.Dataset):
    def __init__(self, tokens, labels):
        self.tokens = tokens
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # return tensor
        item = {key: val[idx].clone().detach() for key, val in self.tokens.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item


class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.MSELoss()
        loss = loss_fct(
            logits.view(-1, self.model.config.num_labels),
            labels.float().view(-1, self.model.config.num_labels),
        )
        return (loss, outputs) if return_outputs else loss


class OptimizedESCallback(EarlyStoppingCallback):
    def __init__(self, patience, initial_steps_wo_save):
        super().__init__(early_stopping_patience=patience)
        self.initial_steps_wo_save = initial_steps_wo_save

    def check_metric_value(self, args, state, control, metric_value):
        super().check_metric_value(args, state, control, metric_value)
        if self.early_stopping_patience_counter == 0:
            control.should_save = True

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        if state.global_step < self.initial_steps_wo_save:
            return
        super().on_evaluate(args, state, control, metrics, **kwargs)


def compute_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    return {
        "root_mean_squared_error": rmse,
        "mean_absolute_error": mae,
        "mean_squared_error": mse,
    }


def compute_metrics_for_regression(eval_pred):
    logits, labels = eval_pred
    labels = labels.reshape(-1, 1)

    rmse = mean_squared_error(labels, logits, squared=False)
    mse = mean_squared_error(labels, logits)
    mae = mean_absolute_error(labels, logits)

    return {
        "root_mean_squared_error": rmse,
        "mean_absolute_error": mae,
        "mean_squared_error": mse,
    }


def get_hugging_face_name(name):
    if name == "camembert-base":
        return "almanach/camembert-base"
    if name == "camembert-large":
        return "almanach/camembert-large"
    return ""


def load_dataset(path, encoding="utf-8", shuffle=True):
    df = pd.read_csv(path, encoding=encoding)
    df.drop_duplicates(inplace=True)
    if shuffle:
        df = df.sample(frac=1, random_state=9).reset_index(drop=True)
    return df


def load_dataset_with_features_fr(dataset, data_root_path='data_fr'):
    df = load_dataset(os.path.join(data_root_path, f'{dataset}_set.csv'))
    df_features = pd.read_csv(os.path.join(data_root_path, 'features', f'features_{dataset}_readability_fr.csv'))
    df_merged = df.merge(df_features, on='ID', suffixes=('', '_df2'))

    # drop or ignore some columns
    ignore_columns = ['sentence_df2', 'paragraphs', 'sentences_per_paragraph']
    df_merged.drop(columns=ignore_columns, inplace=True)

    # add some of our own features
    df_merged['max_word_length'] = df_merged['sentence'].apply(lambda x: max([len(w) for w in x.split()]))

    for i in range(5, 10):
        df_merged['num_word_longer_than_' + str(i)] = df_merged['sentence'].apply(
            lambda x: sum([len(w) > i for w in x.split()]))

    feature_columns = df_merged.columns.to_list()[df_merged.columns.to_list().index('sentence') + 1:]

    return df_merged, feature_columns


os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

BOOTSTRAP_SIZE = 1000  # 1000
MAX_ENSEMBLE_SIZE = 35  # 60
ENSEMBLE_POOL_SIZE = 40  # 100
N_FOLDS = 5
MODEL_NAME = 'camembert-base'  # ['gbert', 'gelectra', 'gottbert', 'gerpt']
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 16
N_EVAL_STEPS = 23

EXPERIMENT_NAME = f'ensemble_{MODEL_NAME}'
EXPERIMENT_DIR = f'cache/{EXPERIMENT_NAME}'

from tensorflow.keras.callbacks import TensorBoard
log_dir = f'{EXPERIMENT_DIR}/logs/mlp/'
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

df_train, feature_columns = load_dataset_with_features_fr('training', data_root_path='data')


def get_predictions(
        df_train_folds,
        df_val_fold,
        n_epochs=10, # Increased from 5 to 10
        n_log_steps=10,
):
    tf.debugging.disable_traceback_filtering()
    # storing predictions in dataframe
    # columns: Sentence, Prediction of Model 1, Prediction of Model 2, ...
    df_predictions_val_fold = df_val_fold[['ID', 'sentence']].copy()

    # get tokenizer
    tokenizer = AutoTokenizer.from_pretrained(get_hugging_face_name(MODEL_NAME))

    X_val_fold = df_val_fold['sentence'].values
    X_val_fold_features = df_val_fold[feature_columns].values

    # tokenize
    tokens_val_fold = tokenizer(X_val_fold.tolist(), padding='max_length', return_tensors='pt', truncation=True,
                                max_length=128)

    for k in range(ENSEMBLE_POOL_SIZE):
        df_early_stopping = df_train_folds.sample(frac=0.1, random_state=k)
        df_train_no_es = df_train_folds.drop(
            df_train_folds[
                df_train_folds['ID'].isin(df_early_stopping['ID'])
            ].index
        )

        ## or use this simplified code to drop rows whose 'ID' is in df_early_stopping['ID']
        # df_train_no_es = df_train_folds[~df_train_folds['ID'].isin(df_early_stopping['ID'])]

        X_early_stopping = df_early_stopping['sentence'].values
        X_early_stopping_features = df_early_stopping[feature_columns].values
        y_early_stopping = df_early_stopping['MOS'].values

        X_training = df_train_no_es['sentence'].values
        X_training_features = df_train_no_es[feature_columns].values
        y_training = df_train_no_es['MOS'].values

        # tokenize
        tokens_early_stopping = tokenizer(X_early_stopping.tolist(), padding='max_length', return_tensors='pt',
                                          truncation=True, max_length=128)

        tokens_training = tokenizer(X_training.tolist(), padding='max_length', return_tensors='pt', truncation=True,
                                    max_length=128)

        hash = (
                hashlib.sha256(
                    pd.util.hash_pandas_object(df_train_no_es['ID'], index=True).values
                ).hexdigest()
                + '_'
                + get_hugging_face_name(MODEL_NAME)[
                  get_hugging_face_name(MODEL_NAME).find('/') + 1:
                  ]
        )

        # load model and, if necessary, train it
        try:
            print(f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}')
            model = AutoModelForSequenceClassification.from_pretrained(
                f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}', local_files_only=True, num_labels=1
            )
        except EnvironmentError:
            # create training dataset
            early_stopping_dataset = TCCDataset(tokens_early_stopping, y_early_stopping)
            training_dataset = TCCDataset(tokens_training, y_training)

            training_args = TrainingArguments(
                output_dir=f'{EXPERIMENT_DIR}/{MODEL_NAME}_trainer/',
                num_train_epochs=10, # Updated (n_epochs)
                per_device_train_batch_size=TRAIN_BATCH_SIZE,
                per_device_eval_batch_size=VALID_BATCH_SIZE,
                warmup_ratio=0.3,
                learning_rate=5e-5,
                no_cuda=False,
                metric_for_best_model='root_mean_squared_error',
                greater_is_better=False,
                load_best_model_at_end=True,
                save_steps=N_EVAL_STEPS * 100_000,
                # we never want to save a model through this function, but the parameter must be set, because of load_best_model_at_end=True
                save_total_limit=1,  # can be 1, because we only save, when we find a better model
                eval_steps=N_EVAL_STEPS,
                # `evaluation_strategy` is deprecated, Use `eval_strategy` instead
                eval_strategy='steps',
                seed=k,
                logging_steps=n_log_steps,
                logging_dir=f'{EXPERIMENT_DIR}/logs/member_{k}',
                logging_strategy='steps',
            )

            set_seed(training_args.seed)
            model = AutoModelForSequenceClassification.from_pretrained(
                get_hugging_face_name(MODEL_NAME), num_labels=1
            )

            trainer = RegressionTrainer(
                model=model,
                args=training_args,
                train_dataset=training_dataset,
                eval_dataset=early_stopping_dataset,
                compute_metrics=compute_metrics_for_regression,
                callbacks=[OptimizedESCallback(patience=5, initial_steps_wo_save=300)],
            )
            # training
            trainer.train()

            # save model
            model.save_pretrained(f'{EXPERIMENT_DIR}/models/{MODEL_NAME}/{hash}')

        # load hidden states of model for validation and test data
        hidden_state_val_fold = extract_hidden_state(model, tokens_val_fold)

        # normalize data with StandardScaler
        scaler = StandardScaler()
        scaler.fit(df_train_folds[feature_columns].values)
        X_val_fold_features_scaled = scaler.transform(X_val_fold_features)
        X_val_fold_with_features = np.concatenate((hidden_state_val_fold.detach().numpy(), X_val_fold_features_scaled),
                                                  axis=1)

        # load MLP model and, if necessary, train it
        try:
            mlp = load_model(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')
        except Exception:
            hidden_state_train = extract_hidden_state(model, tokens_training)
            hidden_state_early_stopping = extract_hidden_state(model, tokens_early_stopping)

            np.random.seed(k)
            mlp = Sequential(
                [
                    Input(shape=(model.config.hidden_size + len(feature_columns),), name='input'),
                    Dense(model.config.hidden_size, activation='relu', name='layer1'),
                    Dense(1, activation='linear', name='layer2'),
                ]
            )

            mlp.compile(
                optimizer='rmsprop',
                loss=tf.keras.losses.MeanSquaredError(),
                metrics=[tf.keras.metrics.RootMeanSquaredError()],
            )
            es = EarlyStopping(monitor='val_root_mean_squared_error', mode='min', verbose=1, patience=100)
            mc = ModelCheckpoint(
                f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras',
                 monitor='val_root_mean_squared_error',
                 mode='min',
                 verbose=1,
                 save_best_only=True
            )

            # normalize data with StandardScaler
            scaler = StandardScaler()
            scaler.fit(X_training_features)
            X_train_features_scaled = scaler.transform(X_training_features)
            X_es_features_scaled = scaler.transform(X_early_stopping_features)

            X_train_with_features = np.concatenate((hidden_state_train.detach().numpy(), X_train_features_scaled),
                                                   axis=1)
            X_es_with_features = np.concatenate((hidden_state_early_stopping.detach().numpy(), X_es_features_scaled),
                                                axis=1)

            mlp.fit(X_train_with_features, y_training,
                    validation_data=(X_es_with_features, y_early_stopping),
                    batch_size=TRAIN_BATCH_SIZE,
                    #epochs=5000, callbacks=[es, mc])
                    epochs=10, callbacks=[tensorboard_callback, es, mc])

        # Manually save the model after training
        mlp.save(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')

        mlp = tf.keras.models.load_model(f'{EXPERIMENT_DIR}/models/mlp/{hash}_mlp.keras')

        # predict MLP on validation and test sets
        prediction_val_fold = mlp.predict(X_val_fold_with_features, batch_size=VALID_BATCH_SIZE)

        df_predictions_val_fold[f'{MODEL_NAME}_prediction_{k}'] = prediction_val_fold

    return df_predictions_val_fold


def extract_hidden_state(model, tokens, batch_size=16):
    last_last_hidden_state = torch.zeros((len(tokens.input_ids), model.config.hidden_size))
    model = model.cuda().eval()
    with torch.no_grad():
        for i in range(0, len(tokens.input_ids), batch_size):
            if i + batch_size > len(tokens.input_ids):
                input_i = tokens.input_ids[i:]
            else:
                input_i = tokens.input_ids[i:i + batch_size]
            output = model(input_i.cuda(), output_hidden_states=True)
            last_hidden_state = output.hidden_states[-1].cpu()
            idx_last_token = torch.zeros(len(input_i)).long()
            last_last_hidden_state[i:i + len(idx_last_token)] = last_hidden_state[
                torch.arange(len(idx_last_token)), idx_last_token]
    return last_last_hidden_state


# dataframe for each metric for each model for each ensemble size
# 3d array: [ensemble_size, model_index, metric_index]
df_macro_ensemble_scores = pd.DataFrame(
    columns=[
        'ensemble_size',
        'model_name',
        'mean_absolute_error_mean',
        'mean_absolute_error_std',
        'mean_squared_error_mean',
        'mean_squared_error_std',
        'root_mean_squared_error_mean',
        'root_mean_squared_error_std',
    ]
)

for fold, (train_idx, val_idx) in enumerate(KFold(n_splits=N_FOLDS).split(df_train)):
    df_train_folds = df_train.loc[train_idx]
    df_val_fold = df_train.loc[val_idx]
    # fill na with mean of columns of train data
    df_train_folds = df_train_folds.fillna(df_train_folds.mean(numeric_only=True))
    df_val_fold = df_val_fold.fillna(df_train_folds.mean(numeric_only=True))

    y_val_fold = df_val_fold['MOS'].values

    pool_predictions_val_fold = get_predictions(df_train_folds, df_val_fold)

    for current_ensemble_size in range(1, MAX_ENSEMBLE_SIZE + 1):
        np.random.seed(current_ensemble_size)
        idx = np.random.choice(
            ENSEMBLE_POOL_SIZE,
            size=(BOOTSTRAP_SIZE, current_ensemble_size),
        )

        idx_mapped = np.array(
            [
                np.array(
                    [pool_predictions_val_fold[f'{MODEL_NAME}_prediction_{k}'] for k in j]
                )
                for j in idx
            ]
        )

        ensemble_predictions = np.array(
            [np.sum(j, axis=0) / len(j) for j in idx_mapped]
        )

        ensemble_scores = [
            compute_metrics(y_val_fold, pred) for pred in ensemble_predictions
        ]

        df_ensemble_scores = pd.DataFrame(ensemble_scores).sort_index(axis=1)

        # add to dataframe
        new_row = pd.DataFrame(
            {
                'ensemble_size': [current_ensemble_size],
                'model_name': [MODEL_NAME],
                'mean_absolute_error_mean': [df_ensemble_scores['mean_absolute_error'].mean()],
                'mean_absolute_error_std': [df_ensemble_scores['mean_absolute_error'].std()],
                'mean_squared_error_mean': [df_ensemble_scores['mean_squared_error'].mean()],
                'mean_squared_error_std': [df_ensemble_scores['mean_squared_error'].std()],
                'root_mean_squared_error_mean': [df_ensemble_scores['root_mean_squared_error'].mean()],
                'root_mean_squared_error_std': [df_ensemble_scores['root_mean_squared_error'].std()],
            })

df_macro_ensemble_scores = pd.concat([df_macro_ensemble_scores, new_row], ignore_index=True)

df_macro_ensemble_scores[
    df_macro_ensemble_scores['model_name'] == MODEL_NAME
    ].to_csv(
    f'ensemble_scores_{MODEL_NAME}.csv', index=False, sep=';', encoding='utf-8'
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/508 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/811k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.40M [00:00<?, ?B/s]

cache/ensemble_camembert-base/models/camembert-base/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base


model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.5813,3.857039,1.963935,1.782031,3.857039
46,2.9737,1.020321,1.010109,0.727173,1.020321
69,0.7608,0.583865,0.76411,0.578074,0.583865
92,0.5365,0.466971,0.683353,0.500862,0.466971
115,0.5501,0.479781,0.692662,0.489816,0.479781
138,0.4903,0.529117,0.727404,0.518183,0.529117
161,0.2507,0.488925,0.699232,0.528725,0.488925


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m17s[0m 1s/step - loss: 8.3199 - root_mean_squared_error: 2.8844
Epoch 1: val_root_mean_squared_error improved from inf to 0.65610, saving model to cache/ensemble_camembert-base/models/mlp/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - loss: 2.7229 - root_mean_squared_error: 1.5679 - val_loss: 0.4305 - val_root_mean_squared_error: 0.6561
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1673 - root_mean_squared_error: 0.4090
Epoch 2: val_root_mean_squared_error improved from 0.65610 to 0.54216, saving model to cache/ensemble_camembert-base/models/mlp/b612d002ea296529659079b311947613391abc659399d56332a19be34b39ce7d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.2909 - root_mean_squared_error: 0.5363

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3471,4.551661,2.133462,1.972431,4.551661
46,2.4469,1.096197,1.046994,0.841982,1.096197
69,0.7991,0.492094,0.701494,0.54946,0.492094
92,0.5574,0.414118,0.64352,0.51638,0.414118
115,0.4932,0.320134,0.565804,0.419553,0.320134
138,0.2884,0.343094,0.585742,0.434916,0.343094
161,0.2704,0.456411,0.675582,0.522591,0.456411


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 6.7574 - root_mean_squared_error: 2.5995
Epoch 1: val_root_mean_squared_error improved from inf to 0.61092, saving model to cache/ensemble_camembert-base/models/mlp/c91b46895596b42df4e1385f135441ffc449535d6e19f5b434fe664fa6596880_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5535 - root_mean_squared_error: 1.5200 - val_loss: 0.3732 - val_root_mean_squared_error: 0.6109
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1242 - root_mean_squared_error: 0.3525
Epoch 2: val_root_mean_squared_error did not improve from 0.61092
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.2231 - root_mean_squared_error: 0.4680 - val_loss: 0.3768 - val_root_mean_squared_error: 0.6139
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8331,3.696201,1.922551,1.755135,3.696201
46,2.5836,0.871559,0.933573,0.736427,0.871559
69,0.8228,0.581764,0.762734,0.582318,0.581764
92,0.7088,0.382408,0.618392,0.494285,0.382408
115,0.4456,0.526086,0.725318,0.556068,0.526086
138,0.2829,0.69971,0.836487,0.667743,0.69971
161,0.2321,0.691669,0.831666,0.668254,0.691669


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 9.2994 - root_mean_squared_error: 3.0495
Epoch 1: val_root_mean_squared_error improved from inf to 0.92580, saving model to cache/ensemble_camembert-base/models/mlp/bb22c2bb2b0d9700bf8d7df910af0b4b56226bc98a51ea26c92306f122bc13de_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6423 - root_mean_squared_error: 1.5354 - val_loss: 0.8571 - val_root_mean_squared_error: 0.9258
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.9391 - root_mean_squared_error: 0.9691
Epoch 2: val_root_mean_squared_error improved from 0.92580 to 0.53355, saving model to cache/ensemble_camembert-base/models/mlp/bb22c2bb2b0d9700bf8d7df910af0b4b56226bc98a51ea26c92306f122bc13de_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5259 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7987,3.656667,1.912241,1.742362,3.656667
46,2.4858,0.848052,0.920898,0.714825,0.848052
69,0.6723,0.532885,0.72999,0.653805,0.532885
92,0.5947,0.610511,0.781352,0.691857,0.610512
115,0.5473,0.466857,0.683269,0.585775,0.466857
138,0.5079,0.500506,0.707464,0.58058,0.500506
161,0.4228,0.447467,0.66893,0.537583,0.447467


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.5520 - root_mean_squared_error: 2.7481
Epoch 1: val_root_mean_squared_error improved from inf to 0.93147, saving model to cache/ensemble_camembert-base/models/mlp/8aef8ffb6ad3e8ad051fa6ca45595b1683b1810a3df6a0d40d82dadfc03b83cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5580 - root_mean_squared_error: 1.5186 - val_loss: 0.8676 - val_root_mean_squared_error: 0.9315
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 1.0509 - root_mean_squared_error: 1.0251
Epoch 2: val_root_mean_squared_error did not improve from 0.93147
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6012 - root_mean_squared_error: 0.7624 - val_loss: 0.9058 - val_root_mean_squared_error: 0.9517
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9231,3.175922,1.782112,1.615359,3.175922
46,2.386,0.648749,0.80545,0.658789,0.648749
69,0.8151,0.503099,0.709294,0.543295,0.503099
92,0.6539,0.655627,0.809708,0.692179,0.655627
115,0.8949,0.84873,0.921265,0.767235,0.84873
138,0.6271,0.460348,0.67849,0.534275,0.460348
161,0.5133,0.458083,0.676819,0.577097,0.458083


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 6.6170 - root_mean_squared_error: 2.5724
Epoch 1: val_root_mean_squared_error improved from inf to 0.45926, saving model to cache/ensemble_camembert-base/models/mlp/e2a0523e902b4fa55bc61bb26cb278bb453811177034abbde9401cfb09771f10_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7330 - root_mean_squared_error: 1.5844 - val_loss: 0.2109 - val_root_mean_squared_error: 0.4593
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2677 - root_mean_squared_error: 0.5174
Epoch 2: val_root_mean_squared_error improved from 0.45926 to 0.35737, saving model to cache/ensemble_camembert-base/models/mlp/e2a0523e902b4fa55bc61bb26cb278bb453811177034abbde9401cfb09771f10_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6532 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7432,4.325266,2.079727,1.902881,4.325266
46,2.2501,0.933694,0.966278,0.779315,0.933694
69,0.6603,0.646499,0.804052,0.66145,0.646499
92,0.6885,0.602144,0.775979,0.653257,0.602144
115,0.5105,0.48195,0.694226,0.562626,0.48195
138,0.453,0.560134,0.748421,0.603547,0.560134
161,0.3772,0.513585,0.716648,0.589249,0.513585


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.6622 - root_mean_squared_error: 2.7681
Epoch 1: val_root_mean_squared_error improved from inf to 0.80651, saving model to cache/ensemble_camembert-base/models/mlp/29c90b483b02e6177a1a12c987aec1a1e8281ffc7d477fc08cfe781a9d709a36_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8576 - root_mean_squared_error: 1.6111 - val_loss: 0.6505 - val_root_mean_squared_error: 0.8065
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3954 - root_mean_squared_error: 0.6288
Epoch 2: val_root_mean_squared_error improved from 0.80651 to 0.51801, saving model to cache/ensemble_camembert-base/models/mlp/29c90b483b02e6177a1a12c987aec1a1e8281ffc7d477fc08cfe781a9d709a36_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4077 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6215,4.653997,2.157312,1.99132,4.653997
46,2.4434,1.221715,1.105312,0.868642,1.221715
69,0.7588,0.670596,0.818899,0.618308,0.670596
92,0.6693,0.618127,0.786211,0.604855,0.618127
115,0.534,0.567811,0.753532,0.578753,0.567811
138,0.4097,0.789578,0.888582,0.670033,0.789578
161,0.3666,0.709187,0.842132,0.637828,0.709187


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 6.5406 - root_mean_squared_error: 2.5575
Epoch 1: val_root_mean_squared_error improved from inf to 0.59990, saving model to cache/ensemble_camembert-base/models/mlp/5e4f0c82c76c1de4ecc7bac97603b913aac8745333016a7294efc52160b5c5d9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7016 - root_mean_squared_error: 1.5693 - val_loss: 0.3599 - val_root_mean_squared_error: 0.5999
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4398 - root_mean_squared_error: 0.6632
Epoch 2: val_root_mean_squared_error improved from 0.59990 to 0.56811, saving model to cache/ensemble_camembert-base/models/mlp/5e4f0c82c76c1de4ecc7bac97603b913aac8745333016a7294efc52160b5c5d9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5418 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.232,4.36649,2.089615,1.91998,4.36649
46,2.4272,1.047579,1.023513,0.82279,1.047579
69,0.6023,0.626019,0.791214,0.617532,0.626019
92,0.5847,0.416539,0.645398,0.53166,0.416539
115,0.4658,0.626326,0.791408,0.648651,0.626326
138,0.3515,0.434375,0.659071,0.545406,0.434375
161,0.2656,0.34668,0.588796,0.472971,0.34668


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 655ms/step - loss: 5.8734 - root_mean_squared_error: 2.4235
Epoch 1: val_root_mean_squared_error improved from inf to 0.64767, saving model to cache/ensemble_camembert-base/models/mlp/2417b66244af6950d394b792a0879e5baa52df8057357916af4a1fe9cd9f3884_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5514 - root_mean_squared_error: 1.5232 - val_loss: 0.4195 - val_root_mean_squared_error: 0.6477
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5189 - root_mean_squared_error: 0.7203
Epoch 2: val_root_mean_squared_error improved from 0.64767 to 0.64453, saving model to cache/ensemble_camembert-base/models/mlp/2417b66244af6950d394b792a0879e5baa52df8057357916af4a1fe9cd9f3884_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.6183 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6001,4.48076,2.11678,1.850406,4.480759
46,2.4729,1.412273,1.188391,0.896744,1.412273
69,0.7235,0.821464,0.906347,0.689504,0.821464
92,0.7927,0.650638,0.806622,0.645306,0.650638
115,0.5033,0.662593,0.813998,0.58129,0.662593
138,0.4008,0.535363,0.731685,0.578824,0.535363
161,0.2812,0.573642,0.757391,0.545834,0.573642


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 9.2782 - root_mean_squared_error: 3.0460
Epoch 1: val_root_mean_squared_error improved from inf to 0.55193, saving model to cache/ensemble_camembert-base/models/mlp/8685d5ee8bd3e79fd0dbf4432f172dbbe03105c5dfba450cc744a927bd73c7d3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7758 - root_mean_squared_error: 1.5784 - val_loss: 0.3046 - val_root_mean_squared_error: 0.5519
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2447 - root_mean_squared_error: 0.4946
Epoch 2: val_root_mean_squared_error improved from 0.55193 to 0.53741, saving model to cache/ensemble_camembert-base/models/mlp/8685d5ee8bd3e79fd0dbf4432f172dbbe03105c5dfba450cc744a927bd73c7d3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4273 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0254,3.296051,1.815503,1.654227,3.296051
46,2.5309,0.693458,0.832742,0.655248,0.693458
69,0.6989,0.580597,0.761969,0.610365,0.580597
92,0.6199,0.424067,0.651204,0.523944,0.424067
115,0.4779,0.400432,0.632797,0.537324,0.400432
138,0.4126,0.421429,0.649176,0.550113,0.421429
161,0.2236,0.45536,0.674804,0.606768,0.45536


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 5.1296 - root_mean_squared_error: 2.2649
Epoch 1: val_root_mean_squared_error improved from inf to 0.52306, saving model to cache/ensemble_camembert-base/models/mlp/8a4814e4aec03a6479b9ec24494843818b497ad7e712337f8943760b856168f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6271 - root_mean_squared_error: 1.5542 - val_loss: 0.2736 - val_root_mean_squared_error: 0.5231
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.0874 - root_mean_squared_error: 0.2956
Epoch 2: val_root_mean_squared_error did not improve from 0.52306
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3587 - root_mean_squared_error: 0.5866 - val_loss: 0.4380 - val_root_mean_squared_error: 0.6618
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9741,2.798398,1.672841,1.448379,2.798398
46,2.1956,0.715705,0.845993,0.713374,0.715705
69,0.6403,0.88183,0.939058,0.834189,0.88183
92,0.6506,0.56697,0.752974,0.568948,0.56697
115,0.5298,0.504541,0.71031,0.532948,0.504541
138,0.3097,0.495462,0.70389,0.526978,0.495462
161,0.2354,0.427378,0.653741,0.478788,0.427378


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 6.0739 - root_mean_squared_error: 2.4645
Epoch 1: val_root_mean_squared_error improved from inf to 0.77954, saving model to cache/ensemble_camembert-base/models/mlp/091c75152f6299a2a53f8c4354dbe816ca45e6e866b8e88328b36ab63710f859_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4399 - root_mean_squared_error: 1.4930 - val_loss: 0.6077 - val_root_mean_squared_error: 0.7795
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2830 - root_mean_squared_error: 0.5320
Epoch 2: val_root_mean_squared_error did not improve from 0.77954
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.2972 - root_mean_squared_error: 0.5372 - val_loss: 0.6350 - val_root_mean_squared_error: 0.7969
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5869,3.994271,1.998567,1.76419,3.994271
46,2.1821,1.124341,1.06035,0.870805,1.124342
69,0.7156,0.857359,0.925937,0.797601,0.857359
92,0.5977,0.638324,0.798952,0.65887,0.638324
115,0.3777,0.812004,0.901113,0.737904,0.812004
138,0.3937,0.667032,0.81672,0.689228,0.667032
161,0.2285,0.780139,0.883255,0.732814,0.780139


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 5.0255 - root_mean_squared_error: 2.2418
Epoch 1: val_root_mean_squared_error improved from inf to 0.63123, saving model to cache/ensemble_camembert-base/models/mlp/783025a51c2d51d131a8dd0b535c8c8be204f5884b4e643d90ebbe147d95b624_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6793 - root_mean_squared_error: 1.5684 - val_loss: 0.3984 - val_root_mean_squared_error: 0.6312
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1300 - root_mean_squared_error: 0.3606
Epoch 2: val_root_mean_squared_error improved from 0.63123 to 0.54171, saving model to cache/ensemble_camembert-base/models/mlp/783025a51c2d51d131a8dd0b535c8c8be204f5884b4e643d90ebbe147d95b624_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2929 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.959,3.305104,1.817995,1.647478,3.305104
46,2.1208,0.665162,0.815575,0.628712,0.665162
69,0.7856,0.562771,0.750181,0.618804,0.562771
92,0.6129,0.454636,0.674267,0.548202,0.454636
115,0.55,0.464784,0.681751,0.539098,0.464784
138,0.335,0.432767,0.65785,0.536473,0.432767
161,0.2271,0.480454,0.693148,0.539189,0.480454


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 5.7693 - root_mean_squared_error: 2.4019
Epoch 1: val_root_mean_squared_error improved from inf to 1.08149, saving model to cache/ensemble_camembert-base/models/mlp/20dc5a7c30b972d5f46a904ff2f2a465814973d60c99937698383c116f779ce3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6200 - root_mean_squared_error: 1.5551 - val_loss: 1.1696 - val_root_mean_squared_error: 1.0815
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.1116 - root_mean_squared_error: 1.0543
Epoch 2: val_root_mean_squared_error improved from 1.08149 to 0.92224, saving model to cache/ensemble_camembert-base/models/mlp/20dc5a7c30b972d5f46a904ff2f2a465814973d60c99937698383c116f779ce3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5448 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7345,4.02487,2.006208,1.849325,4.02487
46,2.4485,0.962573,0.981108,0.748155,0.962573
69,0.8415,0.593082,0.770118,0.60941,0.593082
92,0.6425,0.815801,0.903217,0.679853,0.815801
115,0.4918,0.608645,0.780157,0.65352,0.608645
138,0.2702,0.657555,0.810897,0.703048,0.657555
161,0.2084,0.589476,0.767774,0.653232,0.589476


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 6.4347 - root_mean_squared_error: 2.5367
Epoch 1: val_root_mean_squared_error improved from inf to 0.58103, saving model to cache/ensemble_camembert-base/models/mlp/82dea812ed409f71bd10886113d2c12dcbf6cf29487bc1a7021b481570f51114_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7351 - root_mean_squared_error: 1.5715 - val_loss: 0.3376 - val_root_mean_squared_error: 0.5810
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1202 - root_mean_squared_error: 0.3467
Epoch 2: val_root_mean_squared_error did not improve from 0.58103
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4647 - root_mean_squared_error: 0.6727 - val_loss: 0.5173 - val_root_mean_squared_error: 0.7192
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2846,3.520166,1.876211,1.720384,3.520167
46,2.2635,0.75354,0.868067,0.656646,0.75354
69,0.7431,0.553955,0.744282,0.589048,0.553955
92,0.7452,0.557733,0.746815,0.562377,0.557733
115,0.5521,0.540143,0.734944,0.562858,0.540143
138,0.4808,0.595806,0.771885,0.631946,0.595806
161,0.3324,0.588917,0.767409,0.633909,0.588917


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 8.6636 - root_mean_squared_error: 2.9434
Epoch 1: val_root_mean_squared_error improved from inf to 1.09050, saving model to cache/ensemble_camembert-base/models/mlp/8a8daf6234242730580b965d0e73408c163a9bf53472ff432856524258bdebae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6090 - root_mean_squared_error: 1.5304 - val_loss: 1.1892 - val_root_mean_squared_error: 1.0905
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0341 - root_mean_squared_error: 1.0169
Epoch 2: val_root_mean_squared_error improved from 1.09050 to 0.56879, saving model to cache/ensemble_camembert-base/models/mlp/8a8daf6234242730580b965d0e73408c163a9bf53472ff432856524258bdebae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5329 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4422,4.065984,2.016429,1.821027,4.065984
46,2.5781,1.098526,1.048106,0.788103,1.098526
69,0.9944,0.740679,0.860627,0.68762,0.740679
92,0.6326,0.582688,0.76334,0.602644,0.582688
115,0.462,0.464547,0.681577,0.548485,0.464547
138,0.3369,0.463313,0.680671,0.571119,0.463313
161,0.2472,0.416866,0.645652,0.522224,0.416866


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 7.1340 - root_mean_squared_error: 2.6709
Epoch 1: val_root_mean_squared_error improved from inf to 0.62908, saving model to cache/ensemble_camembert-base/models/mlp/b9ff93eab94429be29c2d0f602b9728456f15e2cfc8b7a863fccda512aff9267_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9334 - root_mean_squared_error: 1.6308 - val_loss: 0.3957 - val_root_mean_squared_error: 0.6291
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3495 - root_mean_squared_error: 0.5911
Epoch 2: val_root_mean_squared_error improved from 0.62908 to 0.60079, saving model to cache/ensemble_camembert-base/models/mlp/b9ff93eab94429be29c2d0f602b9728456f15e2cfc8b7a863fccda512aff9267_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4439 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1522,3.23596,1.798878,1.570534,3.235961
46,2.4209,0.877244,0.936613,0.698892,0.877244
69,0.7687,0.909447,0.953649,0.772234,0.909447
92,0.6004,0.596695,0.77246,0.600752,0.596695
115,0.5444,0.776123,0.880979,0.741043,0.776123
138,0.4317,0.457832,0.676633,0.512043,0.457832
161,0.2624,0.474568,0.688889,0.494279,0.474568


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 7.1912 - root_mean_squared_error: 2.6816
Epoch 1: val_root_mean_squared_error improved from inf to 0.50412, saving model to cache/ensemble_camembert-base/models/mlp/b57476542ec5fa69960f61c669bcff9d4cbead69e96f6b365fe6e619f49f7986_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.8771 - root_mean_squared_error: 1.6187 - val_loss: 0.2541 - val_root_mean_squared_error: 0.5041
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1487 - root_mean_squared_error: 0.3856
Epoch 2: val_root_mean_squared_error improved from 0.50412 to 0.39476, saving model to cache/ensemble_camembert-base/models/mlp/b57476542ec5fa69960f61c669bcff9d4cbead69e96f6b365fe6e619f49f7986_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2581 - root_mean_squared_error: 0.4

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8151,4.527211,2.127724,1.938058,4.527211
46,2.6441,1.18697,1.089482,0.896808,1.18697
69,0.7192,0.718075,0.847393,0.710259,0.718075
92,0.525,0.967325,0.983527,0.770368,0.967325
115,0.4075,0.638075,0.798796,0.664303,0.638075
138,0.3292,0.629548,0.793441,0.638042,0.629548
161,0.2206,0.600621,0.774998,0.64386,0.600621


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 6.0784 - root_mean_squared_error: 2.4654
Epoch 1: val_root_mean_squared_error improved from inf to 0.79158, saving model to cache/ensemble_camembert-base/models/mlp/b1231c7726e942267345aed23b6c4e106ac628982b647fc0eb1020381072b8dd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4194 - root_mean_squared_error: 1.4853 - val_loss: 0.6266 - val_root_mean_squared_error: 0.7916
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2851 - root_mean_squared_error: 0.5339
Epoch 2: val_root_mean_squared_error improved from 0.79158 to 0.62700, saving model to cache/ensemble_camembert-base/models/mlp/b1231c7726e942267345aed23b6c4e106ac628982b647fc0eb1020381072b8dd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3697 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3412,5.148656,2.269065,2.092278,5.148656
46,2.4214,1.550117,1.245037,1.023668,1.550117
69,0.7006,0.769865,0.877419,0.742283,0.769865
92,0.6318,0.712199,0.843919,0.662027,0.712199
115,0.4192,0.625549,0.790916,0.632204,0.625549
138,0.2426,0.6031,0.776595,0.608177,0.6031
161,0.2175,0.497131,0.705075,0.568717,0.497131


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 5.0960 - root_mean_squared_error: 2.2574
Epoch 1: val_root_mean_squared_error improved from inf to 0.45645, saving model to cache/ensemble_camembert-base/models/mlp/1c438edd9402e6c277a20b48bd7bdba653bbc56e86af9574031067c220a9e75c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5252 - root_mean_squared_error: 1.5285 - val_loss: 0.2083 - val_root_mean_squared_error: 0.4564
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2490 - root_mean_squared_error: 0.4990
Epoch 2: val_root_mean_squared_error improved from 0.45645 to 0.45311, saving model to cache/ensemble_camembert-base/models/mlp/1c438edd9402e6c277a20b48bd7bdba653bbc56e86af9574031067c220a9e75c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3586 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5097,4.729456,2.174731,1.995816,4.729456
46,2.1448,1.186289,1.089169,0.882153,1.186289
69,0.6967,0.729208,0.853937,0.706398,0.729208
92,0.6123,0.833157,0.912774,0.750663,0.833157
115,0.5976,0.569237,0.754478,0.629467,0.569237
138,0.4067,0.567485,0.753316,0.626955,0.567485
161,0.3276,0.528983,0.727312,0.58735,0.528983


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 9.9478 - root_mean_squared_error: 3.1540
Epoch 1: val_root_mean_squared_error improved from inf to 0.56491, saving model to cache/ensemble_camembert-base/models/mlp/01d7f990a55e81391ea51d5c409c3dfb6918dda7e81097cb1c3dfd2e449fc07d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0819 - root_mean_squared_error: 1.6678 - val_loss: 0.3191 - val_root_mean_squared_error: 0.5649
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5159 - root_mean_squared_error: 0.7183
Epoch 2: val_root_mean_squared_error improved from 0.56491 to 0.46719, saving model to cache/ensemble_camembert-base/models/mlp/01d7f990a55e81391ea51d5c409c3dfb6918dda7e81097cb1c3dfd2e449fc07d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5010 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.039,2.838003,1.684637,1.479252,2.838003
46,2.1459,0.678529,0.823729,0.573795,0.678529
69,0.7579,0.665435,0.815742,0.647658,0.665435
92,0.602,0.609438,0.780665,0.627731,0.609438
115,0.5306,0.50649,0.711681,0.525931,0.50649
138,0.3975,0.627368,0.792066,0.655318,0.627368
161,0.2295,0.609548,0.780736,0.635911,0.609548


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 6.9543 - root_mean_squared_error: 2.6371
Epoch 1: val_root_mean_squared_error improved from inf to 0.55313, saving model to cache/ensemble_camembert-base/models/mlp/adcded9736c4e074320477ff3acef76e782c7d2d394e8cb9e6f8d85873de9223_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3149 - root_mean_squared_error: 1.4460 - val_loss: 0.3060 - val_root_mean_squared_error: 0.5531
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1808 - root_mean_squared_error: 0.4253
Epoch 2: val_root_mean_squared_error improved from 0.55313 to 0.48673, saving model to cache/ensemble_camembert-base/models/mlp/adcded9736c4e074320477ff3acef76e782c7d2d394e8cb9e6f8d85873de9223_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.2870 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3752,3.142316,1.772658,1.59711,3.142316
46,2.5221,0.715278,0.845741,0.677966,0.715278
69,0.7209,0.558652,0.74743,0.647986,0.558652
92,0.6346,0.56616,0.752436,0.653721,0.56616
115,0.5393,0.4251,0.651997,0.505747,0.4251
138,0.3888,0.415559,0.644639,0.535311,0.415559
161,0.2011,0.521122,0.721888,0.576365,0.521122


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 6.8175 - root_mean_squared_error: 2.6110
Epoch 1: val_root_mean_squared_error improved from inf to 0.64327, saving model to cache/ensemble_camembert-base/models/mlp/c932e92e6feee931ae283d77f2f56af3425f1890a389b37f17ad2133a19f05ea_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4221 - root_mean_squared_error: 1.4804 - val_loss: 0.4138 - val_root_mean_squared_error: 0.6433
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.7577 - root_mean_squared_error: 0.8704
Epoch 2: val_root_mean_squared_error improved from 0.64327 to 0.35714, saving model to cache/ensemble_camembert-base/models/mlp/c932e92e6feee931ae283d77f2f56af3425f1890a389b37f17ad2133a19f05ea_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5574 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4763,5.878636,2.42459,2.264738,5.878636
46,2.3395,1.691212,1.300466,1.048206,1.691212
69,0.8172,1.034853,1.017277,0.801435,1.034853
92,0.7709,0.80108,0.895031,0.711459,0.80108
115,0.654,0.83949,0.916237,0.731616,0.83949
138,0.4688,0.601172,0.775353,0.578983,0.601172
161,0.335,0.575062,0.758329,0.595792,0.575062


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 9.6768 - root_mean_squared_error: 3.1107
Epoch 1: val_root_mean_squared_error improved from inf to 0.77389, saving model to cache/ensemble_camembert-base/models/mlp/e51732a324a961fc74df5bc4e3432232d137f193ffaa79459e6941ab34f67eda_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6404 - root_mean_squared_error: 1.5374 - val_loss: 0.5989 - val_root_mean_squared_error: 0.7739
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9933 - root_mean_squared_error: 0.9967
Epoch 2: val_root_mean_squared_error improved from 0.77389 to 0.76785, saving model to cache/ensemble_camembert-base/models/mlp/e51732a324a961fc74df5bc4e3432232d137f193ffaa79459e6941ab34f67eda_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5873 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2605,4.516588,2.125227,1.946397,4.516588
46,2.3532,1.099113,1.048386,0.802071,1.099113
69,0.7199,0.733001,0.856155,0.689901,0.733001
92,0.6435,0.703524,0.838763,0.688625,0.703524
115,0.5646,0.63461,0.796624,0.615888,0.63461
138,0.3782,0.471737,0.686831,0.591219,0.471737
161,0.2624,0.528419,0.726924,0.599264,0.528419


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 6.4393 - root_mean_squared_error: 2.5376
Epoch 1: val_root_mean_squared_error improved from inf to 0.55170, saving model to cache/ensemble_camembert-base/models/mlp/9fc2278b9fb032f58cd23e27c5f396b0a6db8587018b115d0d04cc171dba8904_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7480 - root_mean_squared_error: 1.5767 - val_loss: 0.3044 - val_root_mean_squared_error: 0.5517
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1274 - root_mean_squared_error: 0.3569
Epoch 2: val_root_mean_squared_error did not improve from 0.55170
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4731 - root_mean_squared_error: 0.6754 - val_loss: 0.7207 - val_root_mean_squared_error: 0.8489
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0833,3.395951,1.842811,1.669634,3.395951
46,2.4371,0.740988,0.860807,0.632562,0.740988
69,0.8516,0.584644,0.76462,0.613887,0.584644
92,0.7204,0.508455,0.71306,0.591524,0.508455
115,0.4944,0.494996,0.70356,0.572899,0.494996
138,0.3815,0.4619,0.679632,0.547458,0.4619
161,0.2358,0.478717,0.691894,0.595748,0.478717


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 689ms/step - loss: 7.1011 - root_mean_squared_error: 2.6648
Epoch 1: val_root_mean_squared_error improved from inf to 0.64670, saving model to cache/ensemble_camembert-base/models/mlp/4b14a9b31868759a36dad1ae32f1121755df9f3cf2646e1e78ec148acbe1baf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3266 - root_mean_squared_error: 1.4566 - val_loss: 0.4182 - val_root_mean_squared_error: 0.6467
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4447 - root_mean_squared_error: 0.6669
Epoch 2: val_root_mean_squared_error improved from 0.64670 to 0.47848, saving model to cache/ensemble_camembert-base/models/mlp/4b14a9b31868759a36dad1ae32f1121755df9f3cf2646e1e78ec148acbe1baf4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4312 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9493,4.961608,2.227467,2.014114,4.961608
46,2.5639,1.541502,1.241572,0.986504,1.541502
69,0.7791,0.939619,0.96934,0.762604,0.939619
92,0.6878,0.838772,0.915845,0.71461,0.838772
115,0.557,0.763357,0.873703,0.674888,0.763357
138,0.4219,0.784412,0.885671,0.718784,0.784412
161,0.2556,0.762111,0.87299,0.696496,0.762111


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 698ms/step - loss: 7.5953 - root_mean_squared_error: 2.7560
Epoch 1: val_root_mean_squared_error improved from inf to 0.87377, saving model to cache/ensemble_camembert-base/models/mlp/6a317fed24385a19e50a76c87e2e9bdf452604069dca5e47ce84b9d420822dc3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6871 - root_mean_squared_error: 1.5541 - val_loss: 0.7635 - val_root_mean_squared_error: 0.8738
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3473 - root_mean_squared_error: 0.5893
Epoch 2: val_root_mean_squared_error improved from 0.87377 to 0.60696, saving model to cache/ensemble_camembert-base/models/mlp/6a317fed24385a19e50a76c87e2e9bdf452604069dca5e47ce84b9d420822dc3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3706 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7803,3.837433,1.958937,1.781517,3.837433
46,2.6862,0.87897,0.937534,0.759877,0.87897
69,0.7386,0.644202,0.802622,0.660314,0.644202
92,0.6543,0.615554,0.784573,0.568386,0.615554
115,0.4873,0.499819,0.706979,0.529191,0.499819
138,0.5156,0.474767,0.689034,0.540966,0.474767
161,0.2613,0.495208,0.70371,0.547525,0.495208


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 679ms/step - loss: 8.2815 - root_mean_squared_error: 2.8778
Epoch 1: val_root_mean_squared_error improved from inf to 0.69352, saving model to cache/ensemble_camembert-base/models/mlp/f4d6ad45a2f846178e015ed274b2c46dc44bf61d323a00ba1bf31acce30441f4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6189 - root_mean_squared_error: 1.5329 - val_loss: 0.4810 - val_root_mean_squared_error: 0.6935
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5180 - root_mean_squared_error: 0.7197
Epoch 2: val_root_mean_squared_error did not improve from 0.69352
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5305 - root_mean_squared_error: 0.7198 - val_loss: 0.5040 - val_root_mean_squared_error: 0.7099
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.438,3.587848,1.894162,1.777823,3.587848
46,2.3407,0.631905,0.794925,0.648594,0.631905
69,0.6243,0.430437,0.656077,0.562297,0.430437
92,0.7162,0.359375,0.599479,0.507953,0.359375
115,0.5535,0.383964,0.619648,0.525458,0.383964
138,0.4569,0.386041,0.621322,0.494205,0.386041
161,0.3148,0.427993,0.654212,0.528115,0.427993


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 679ms/step - loss: 7.1359 - root_mean_squared_error: 2.6713
Epoch 1: val_root_mean_squared_error improved from inf to 0.63608, saving model to cache/ensemble_camembert-base/models/mlp/5b836f84ffb4de512943c48d4d8d6886b250d16c0aa73b80ead9497c03af9a03_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8471 - root_mean_squared_error: 1.6097 - val_loss: 0.4046 - val_root_mean_squared_error: 0.6361
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3702 - root_mean_squared_error: 0.6085
Epoch 2: val_root_mean_squared_error improved from 0.63608 to 0.48450, saving model to cache/ensemble_camembert-base/models/mlp/5b836f84ffb4de512943c48d4d8d6886b250d16c0aa73b80ead9497c03af9a03_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.2964 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7288,3.086492,1.756842,1.56103,3.086492
46,1.9813,0.693999,0.833066,0.624386,0.693999
69,0.853,0.634391,0.796487,0.599459,0.634391
92,0.5046,0.512544,0.715922,0.528897,0.512544
115,0.4661,0.62907,0.79314,0.661472,0.62907
138,0.4195,0.590693,0.768566,0.621164,0.590693
161,0.3037,0.595601,0.771752,0.662636,0.595601


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 7.8111 - root_mean_squared_error: 2.7948
Epoch 1: val_root_mean_squared_error improved from inf to 0.57176, saving model to cache/ensemble_camembert-base/models/mlp/2d7d88ddd778d3577f4c03e1b87367fca288452ca48d42c8edba470f93ac6d9f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0033 - root_mean_squared_error: 1.6532 - val_loss: 0.3269 - val_root_mean_squared_error: 0.5718
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.4514 - root_mean_squared_error: 0.6719
Epoch 2: val_root_mean_squared_error improved from 0.57176 to 0.47350, saving model to cache/ensemble_camembert-base/models/mlp/2d7d88ddd778d3577f4c03e1b87367fca288452ca48d42c8edba470f93ac6d9f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3556 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.19,3.632479,1.905906,1.750138,3.632479
46,2.6958,0.824022,0.907756,0.660584,0.824022
69,0.7743,0.620343,0.787619,0.617797,0.620343
92,0.6663,0.526702,0.725742,0.576937,0.526701
115,0.537,0.572983,0.756956,0.634965,0.572983
138,0.4262,0.456895,0.67594,0.564763,0.456895
161,0.3945,0.439919,0.663264,0.553442,0.439919


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 707ms/step - loss: 8.3902 - root_mean_squared_error: 2.8966
Epoch 1: val_root_mean_squared_error improved from inf to 0.62295, saving model to cache/ensemble_camembert-base/models/mlp/729d3fc844a465cd30367969b82299cf786732c878aadace3f4321e8ece7baab_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2389 - root_mean_squared_error: 1.7128 - val_loss: 0.3881 - val_root_mean_squared_error: 0.6230
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1574 - root_mean_squared_error: 0.3967
Epoch 2: val_root_mean_squared_error improved from 0.62295 to 0.44516, saving model to cache/ensemble_camembert-base/models/mlp/729d3fc844a465cd30367969b82299cf786732c878aadace3f4321e8ece7baab_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3152 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0679,4.54639,2.132226,1.984602,4.546389
46,2.7208,1.172257,1.082708,0.844714,1.172257
69,0.8053,0.49349,0.702488,0.581635,0.49349
92,0.6353,0.387377,0.622396,0.485857,0.387377
115,0.3563,0.364346,0.603611,0.48286,0.364346
138,0.4539,0.342789,0.585482,0.467093,0.342789
161,0.2699,0.432737,0.657827,0.479064,0.432737


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 679ms/step - loss: 6.5996 - root_mean_squared_error: 2.5690
Epoch 1: val_root_mean_squared_error improved from inf to 0.53069, saving model to cache/ensemble_camembert-base/models/mlp/05f918c72a8624909f3a4048fa1ed18b5b836787eb3fffc3abd94bf7a9cc60f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4450 - root_mean_squared_error: 1.4873 - val_loss: 0.2816 - val_root_mean_squared_error: 0.5307
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2139 - root_mean_squared_error: 0.4625
Epoch 2: val_root_mean_squared_error did not improve from 0.53069
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4042 - root_mean_squared_error: 0.6262 - val_loss: 0.2951 - val_root_mean_squared_error: 0.5433
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2179,3.950909,1.987689,1.832654,3.950909
46,2.5972,0.859668,0.927183,0.720307,0.859668
69,0.6693,0.632994,0.795609,0.65032,0.632994
92,0.6487,0.53055,0.728388,0.606475,0.53055
115,0.4185,0.955261,0.977374,0.782357,0.955261
138,0.3496,0.536944,0.732765,0.593735,0.536944
161,0.1994,0.468037,0.684132,0.594681,0.468037


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 4.3071 - root_mean_squared_error: 2.0754
Epoch 1: val_root_mean_squared_error improved from inf to 0.56644, saving model to cache/ensemble_camembert-base/models/mlp/8b0a576710bd4fd4b5c445d90ace9a8c336879b7d5624fc8f538747883b9bf43_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6664 - root_mean_squared_error: 1.5731 - val_loss: 0.3209 - val_root_mean_squared_error: 0.5664
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1497 - root_mean_squared_error: 0.3869
Epoch 2: val_root_mean_squared_error did not improve from 0.56644
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3605 - root_mean_squared_error: 0.5970 - val_loss: 0.6142 - val_root_mean_squared_error: 0.7837
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5861,4.645146,2.15526,1.93273,4.645146
46,2.3676,1.322095,1.149824,0.931533,1.322095
69,0.7153,0.904707,0.951161,0.803834,0.904707
92,0.546,0.780253,0.88332,0.701314,0.780253
115,0.4422,0.69375,0.832916,0.65679,0.69375
138,0.4608,0.685489,0.827943,0.643169,0.685489
161,0.3513,0.676626,0.822573,0.666793,0.676626


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 8.8061 - root_mean_squared_error: 2.9675
Epoch 1: val_root_mean_squared_error improved from inf to 0.94933, saving model to cache/ensemble_camembert-base/models/mlp/2e4074fc3daac0cf8624b261fd1b51eede9b79ed28cb22d3f62157c62f002976_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8782 - root_mean_squared_error: 1.6084 - val_loss: 0.9012 - val_root_mean_squared_error: 0.9493
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4761 - root_mean_squared_error: 0.6900
Epoch 2: val_root_mean_squared_error improved from 0.94933 to 0.70274, saving model to cache/ensemble_camembert-base/models/mlp/2e4074fc3daac0cf8624b261fd1b51eede9b79ed28cb22d3f62157c62f002976_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2912 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3004,3.822262,1.95506,1.762062,3.822262
46,2.1808,0.893884,0.945454,0.686302,0.893884
69,0.6437,0.731964,0.855549,0.641102,0.731964
92,0.6205,0.699179,0.836169,0.663402,0.699179
115,0.5487,0.596234,0.772162,0.605533,0.596234
138,0.5022,0.623173,0.789413,0.63128,0.623173
161,0.2642,0.517507,0.71938,0.5916,0.517507


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 6.1226 - root_mean_squared_error: 2.4744
Epoch 1: val_root_mean_squared_error improved from inf to 0.97760, saving model to cache/ensemble_camembert-base/models/mlp/d650433b311afb0f5a7ee9d54643396114b91dc70209a0dadce419cbfd82b09c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5104 - root_mean_squared_error: 1.5232 - val_loss: 0.9557 - val_root_mean_squared_error: 0.9776
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.1835 - root_mean_squared_error: 1.0879
Epoch 2: val_root_mean_squared_error improved from 0.97760 to 0.52309, saving model to cache/ensemble_camembert-base/models/mlp/d650433b311afb0f5a7ee9d54643396114b91dc70209a0dadce419cbfd82b09c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6338 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2686,3.64984,1.910455,1.729775,3.64984
46,2.3749,0.850992,0.922493,0.742361,0.850992
69,0.773,0.642769,0.801729,0.645787,0.642769
92,0.5998,0.510496,0.71449,0.582227,0.510496
115,0.4655,0.594487,0.771029,0.599921,0.594486
138,0.3148,0.534799,0.731299,0.630451,0.534799
161,0.2536,0.523596,0.723599,0.609135,0.523596


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 6.7603 - root_mean_squared_error: 2.6001
Epoch 1: val_root_mean_squared_error improved from inf to 0.63353, saving model to cache/ensemble_camembert-base/models/mlp/32a7c1c4400e3bfc1d50effd75720a5b90065021af5f677f4bd56d519bf56cfb_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5350 - root_mean_squared_error: 1.5159 - val_loss: 0.4014 - val_root_mean_squared_error: 0.6335
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3063 - root_mean_squared_error: 0.5535
Epoch 2: val_root_mean_squared_error did not improve from 0.63353
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.4595 - root_mean_squared_error: 0.6750 - val_loss: 0.6464 - val_root_mean_squared_error: 0.8040
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8898,5.607996,2.368121,2.107355,5.607995
46,2.523,1.815036,1.347233,1.001334,1.815036
69,0.6657,1.246245,1.116353,0.892571,1.246245
92,0.6613,1.361009,1.166623,0.894445,1.361009
115,0.5673,1.17199,1.082585,0.926602,1.17199
138,0.6523,1.16981,1.081578,0.89188,1.16981
161,0.5929,1.011886,1.005926,0.819409,1.011886


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 5.9019 - root_mean_squared_error: 2.4294
Epoch 1: val_root_mean_squared_error improved from inf to 0.70067, saving model to cache/ensemble_camembert-base/models/mlp/d329d22ae60ae3705b140afc4a0835d8e39cb1cdd5fe908c53d0975c9cf00374_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2415 - root_mean_squared_error: 1.7277 - val_loss: 0.4909 - val_root_mean_squared_error: 0.7007
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5904 - root_mean_squared_error: 0.7684
Epoch 2: val_root_mean_squared_error did not improve from 0.70067
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5254 - root_mean_squared_error: 0.7203 - val_loss: 0.4938 - val_root_mean_squared_error: 0.7027
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8605,4.160375,2.0397,1.826097,4.160375
46,2.5898,1.199763,1.095337,0.880426,1.199763
69,0.9734,0.84989,0.921895,0.762433,0.84989
92,0.6962,0.682454,0.826108,0.669336,0.682454
115,0.5367,0.590268,0.768289,0.618371,0.590268
138,0.4466,0.559535,0.748021,0.600212,0.559535
161,0.3276,0.538028,0.733504,0.584603,0.538028


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 6.7103 - root_mean_squared_error: 2.5904
Epoch 1: val_root_mean_squared_error improved from inf to 0.62014, saving model to cache/ensemble_camembert-base/models/mlp/c54a4b2764d57144aa35a81b18fd59f49ca3a2774a3a5a34ac0fb1c7a1bc4a05_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0615 - root_mean_squared_error: 1.6728 - val_loss: 0.3846 - val_root_mean_squared_error: 0.6201
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3339 - root_mean_squared_error: 0.5778
Epoch 2: val_root_mean_squared_error improved from 0.62014 to 0.51989, saving model to cache/ensemble_camembert-base/models/mlp/c54a4b2764d57144aa35a81b18fd59f49ca3a2774a3a5a34ac0fb1c7a1bc4a05_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3175 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4046,3.644058,1.908942,1.740519,3.644058
46,2.5891,0.793901,0.891011,0.734097,0.793901
69,0.8661,0.607681,0.779539,0.632266,0.607681
92,0.6458,0.628914,0.793041,0.625751,0.628914
115,0.5734,0.656357,0.810158,0.633416,0.656357
138,0.5559,0.687244,0.829002,0.660668,0.687244
161,0.5092,0.691178,0.831371,0.663697,0.691178


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 6.2866 - root_mean_squared_error: 2.5073
Epoch 1: val_root_mean_squared_error improved from inf to 0.84083, saving model to cache/ensemble_camembert-base/models/mlp/cddb51286605f923863923afd27b5cf3e2ddd418200d8aa790ac0918441fe434_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.2951 - root_mean_squared_error: 1.4519 - val_loss: 0.7070 - val_root_mean_squared_error: 0.8408
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3341 - root_mean_squared_error: 0.5780
Epoch 2: val_root_mean_squared_error did not improve from 0.84083
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6270 - root_mean_squared_error: 0.7807 - val_loss: 1.1387 - val_root_mean_squared_error: 1.0671
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7994,2.813447,1.677333,1.472161,2.813446
46,2.4683,0.670289,0.818712,0.606625,0.670289
69,0.8846,0.759576,0.871537,0.748581,0.759576
92,0.6647,0.556232,0.74581,0.618617,0.556232
115,0.5012,0.566921,0.752942,0.551284,0.566921
138,0.3559,0.382077,0.618123,0.492785,0.382076
161,0.2389,0.360988,0.600823,0.500773,0.360988


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 7.3871 - root_mean_squared_error: 2.7179
Epoch 1: val_root_mean_squared_error improved from inf to 0.74504, saving model to cache/ensemble_camembert-base/models/mlp/7a6ff45552d5717363278bc86c6a4621e4b99319cebd31724ce412890d8afd88_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2569 - root_mean_squared_error: 1.7205 - val_loss: 0.5551 - val_root_mean_squared_error: 0.7450
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.5503 - root_mean_squared_error: 0.7418
Epoch 2: val_root_mean_squared_error improved from 0.74504 to 0.51866, saving model to cache/ensemble_camembert-base/models/mlp/7a6ff45552d5717363278bc86c6a4621e4b99319cebd31724ce412890d8afd88_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4975 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.058,3.90156,1.975237,1.744175,3.90156
46,2.254,1.091259,1.044634,0.773981,1.091259
69,0.7057,0.696908,0.83481,0.668581,0.696908
92,0.5181,0.606274,0.778636,0.677165,0.606274
115,0.4377,0.729267,0.853971,0.692086,0.729267
138,0.3963,0.705243,0.839787,0.65485,0.705243
161,0.2542,0.631857,0.794894,0.63349,0.631857


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 5.6654 - root_mean_squared_error: 2.3802
Epoch 1: val_root_mean_squared_error improved from inf to 0.49935, saving model to cache/ensemble_camembert-base/models/mlp/2cf120b390bcc9aab0703bbf42c3fe38f3f194bbf3110a83c6dd749fe91ce56f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.1852 - root_mean_squared_error: 1.4162 - val_loss: 0.2494 - val_root_mean_squared_error: 0.4994
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1229 - root_mean_squared_error: 0.3506
Epoch 2: val_root_mean_squared_error improved from 0.49935 to 0.44315, saving model to cache/ensemble_camembert-base/models/mlp/2cf120b390bcc9aab0703bbf42c3fe38f3f194bbf3110a83c6dd749fe91ce56f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3526 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.5399,4.757725,2.181221,1.920522,4.757725
46,2.8749,1.601814,1.265628,0.874937,1.601814
69,0.899,0.965282,0.982488,0.762488,0.965282
92,0.6737,0.864862,0.92998,0.722693,0.864862
115,0.6416,0.722503,0.850002,0.606852,0.722503
138,0.5838,0.627872,0.792383,0.582078,0.627872
161,0.3528,0.60293,0.776486,0.57676,0.60293


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 680ms/step - loss: 9.5434 - root_mean_squared_error: 3.0892
Epoch 1: val_root_mean_squared_error improved from inf to 0.65714, saving model to cache/ensemble_camembert-base/models/mlp/1317f9ea9be7bd9900517cbccd77e6f78d6edfe96bca75952893a566ae04cd40_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1290 - root_mean_squared_error: 1.6753 - val_loss: 0.4318 - val_root_mean_squared_error: 0.6571
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1536 - root_mean_squared_error: 0.3919
Epoch 2: val_root_mean_squared_error improved from 0.65714 to 0.46429, saving model to cache/ensemble_camembert-base/models/mlp/1317f9ea9be7bd9900517cbccd77e6f78d6edfe96bca75952893a566ae04cd40_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5289 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6547,4.572532,2.138348,1.952109,4.572532
46,2.6739,1.174337,1.083668,0.856922,1.174337
69,0.9024,0.737222,0.858616,0.681562,0.737222
92,0.7545,0.695073,0.833711,0.650879,0.695073
115,0.6011,0.690612,0.831031,0.608395,0.690612
138,0.4146,0.727437,0.852899,0.639499,0.727437
161,0.3494,0.727296,0.852816,0.640853,0.727296


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 6.5073 - root_mean_squared_error: 2.5509
Epoch 1: val_root_mean_squared_error improved from inf to 0.81656, saving model to cache/ensemble_camembert-base/models/mlp/701dbc40f09fd92c680961214a1a63a3c54cd1a3cb7be7f2ebaf57173e48dc94_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4061 - root_mean_squared_error: 1.4825 - val_loss: 0.6668 - val_root_mean_squared_error: 0.8166
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8511 - root_mean_squared_error: 0.9225
Epoch 2: val_root_mean_squared_error did not improve from 0.81656
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6552 - root_mean_squared_error: 0.7928 - val_loss: 1.2098 - val_root_mean_squared_error: 1.0999
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9682,4.283275,2.069607,1.898322,4.283275
46,3.0003,1.097647,1.047687,0.806801,1.097647
69,0.9665,0.63312,0.795689,0.595858,0.63312
92,0.9386,0.738526,0.859375,0.719313,0.738526
115,0.6168,0.575915,0.758891,0.579139,0.575915
138,0.4847,0.527152,0.726053,0.58551,0.527152
161,0.3582,0.531835,0.72927,0.584309,0.531835


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 9.4734 - root_mean_squared_error: 3.0779
Epoch 1: val_root_mean_squared_error improved from inf to 1.06998, saving model to cache/ensemble_camembert-base/models/mlp/92035b5d01c00a7e8fe793e43cafc57db5bb34a479141d42b29969334e7bf716_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0534 - root_mean_squared_error: 1.6517 - val_loss: 1.1448 - val_root_mean_squared_error: 1.0700
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9232 - root_mean_squared_error: 0.9608
Epoch 2: val_root_mean_squared_error improved from 1.06998 to 0.89369, saving model to cache/ensemble_camembert-base/models/mlp/92035b5d01c00a7e8fe793e43cafc57db5bb34a479141d42b29969334e7bf716_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5955 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1789,3.921118,1.980181,1.784276,3.921118
46,2.9416,1.014434,1.007191,0.78708,1.014434
69,0.7994,0.782986,0.884865,0.785592,0.782986
92,0.7723,0.601205,0.775374,0.62306,0.601205
115,0.5979,0.912714,0.955361,0.684525,0.912714
138,0.4317,0.668783,0.817792,0.629116,0.668783
161,0.3082,0.846663,0.920143,0.68727,0.846663


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 692ms/step - loss: 7.6077 - root_mean_squared_error: 2.7582
Epoch 1: val_root_mean_squared_error improved from inf to 1.12676, saving model to cache/ensemble_camembert-base/models/mlp/e9bc56bdc668459292acc9d2bed56e5f9d44bea0021e35b346d0ba93bcc1987d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0700 - root_mean_squared_error: 1.6651 - val_loss: 1.2696 - val_root_mean_squared_error: 1.1268
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7585 - root_mean_squared_error: 0.8709
Epoch 2: val_root_mean_squared_error improved from 1.12676 to 0.67702, saving model to cache/ensemble_camembert-base/models/mlp/e9bc56bdc668459292acc9d2bed56e5f9d44bea0021e35b346d0ba93bcc1987d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4587 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.237,4.14309,2.035458,1.800914,4.14309
46,2.6075,1.136197,1.065925,0.842276,1.136197
69,0.8894,0.754242,0.868471,0.672031,0.754242
92,0.6565,0.599531,0.774294,0.603804,0.599531
115,0.5291,0.64863,0.805376,0.679644,0.64863
138,0.3729,0.582619,0.763295,0.623259,0.582619
161,0.3196,0.633189,0.795732,0.662543,0.633189


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 6.1322 - root_mean_squared_error: 2.4763
Epoch 1: val_root_mean_squared_error improved from inf to 0.60204, saving model to cache/ensemble_camembert-base/models/mlp/42d2e3518cfd5a1ec6525ff8a1e5fba0fea095bb2cada4e17fcf5cf314d404b2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4684 - root_mean_squared_error: 1.5144 - val_loss: 0.3625 - val_root_mean_squared_error: 0.6020
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1988 - root_mean_squared_error: 0.4459
Epoch 2: val_root_mean_squared_error improved from 0.60204 to 0.54960, saving model to cache/ensemble_camembert-base/models/mlp/42d2e3518cfd5a1ec6525ff8a1e5fba0fea095bb2cada4e17fcf5cf314d404b2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5527 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0147,5.013215,2.239021,2.010913,5.013215
46,2.4117,1.33946,1.15735,0.901967,1.33946
69,0.7822,0.971992,0.985896,0.769352,0.971992
92,0.7538,0.889824,0.943305,0.73888,0.889824
115,0.5619,0.871007,0.933278,0.742994,0.871007
138,0.4852,0.735156,0.857412,0.686276,0.735156
161,0.3026,1.018766,1.009339,0.785969,1.018766


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 7.6018 - root_mean_squared_error: 2.7571
Epoch 1: val_root_mean_squared_error improved from inf to 0.73406, saving model to cache/ensemble_camembert-base/models/mlp/d76fa36962a3a7b8dbaababfc4bb626211eccd9f7b4e179a0034a461d0a907f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2782 - root_mean_squared_error: 1.7349 - val_loss: 0.5388 - val_root_mean_squared_error: 0.7341
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3203 - root_mean_squared_error: 0.5659
Epoch 2: val_root_mean_squared_error improved from 0.73406 to 0.68157, saving model to cache/ensemble_camembert-base/models/mlp/d76fa36962a3a7b8dbaababfc4bb626211eccd9f7b4e179a0034a461d0a907f2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4678 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8644,5.564167,2.358849,2.19899,5.564167
46,2.7592,1.581253,1.257479,1.000045,1.581253
69,0.9353,0.734128,0.856813,0.696741,0.734128
92,0.7043,1.022707,1.01129,0.789414,1.022707
115,0.5258,0.866545,0.930884,0.730447,0.866545
138,0.3999,1.098915,1.048291,0.824323,1.098915
161,0.3515,0.899173,0.948247,0.760931,0.899173


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 7.2955 - root_mean_squared_error: 2.7010
Epoch 1: val_root_mean_squared_error improved from inf to 0.69763, saving model to cache/ensemble_camembert-base/models/mlp/bf22f9fee59e97230271e6f6bb46c42e8d0d6f582757583ed547c11eaa6567e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9299 - root_mean_squared_error: 1.6283 - val_loss: 0.4867 - val_root_mean_squared_error: 0.6976
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4862 - root_mean_squared_error: 0.6973
Epoch 2: val_root_mean_squared_error improved from 0.69763 to 0.63604, saving model to cache/ensemble_camembert-base/models/mlp/bf22f9fee59e97230271e6f6bb46c42e8d0d6f582757583ed547c11eaa6567e7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4877 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3597,5.274744,2.296681,2.080341,5.274744
46,2.7403,1.532274,1.237851,0.970531,1.532274
69,0.74,0.943523,0.971351,0.758997,0.943523
92,0.6418,0.776318,0.881089,0.731878,0.776318
115,0.5534,0.72618,0.852162,0.673187,0.72618
138,0.4569,0.786064,0.886603,0.710065,0.786064
161,0.2918,0.772079,0.87868,0.715257,0.772079


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 7.3977 - root_mean_squared_error: 2.7199
Epoch 1: val_root_mean_squared_error improved from inf to 0.61812, saving model to cache/ensemble_camembert-base/models/mlp/d63f653d22091a00061a83204896cece2b5f80614b4cf1d648815be5f7c16796_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8708 - root_mean_squared_error: 1.6154 - val_loss: 0.3821 - val_root_mean_squared_error: 0.6181
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1668 - root_mean_squared_error: 0.4084
Epoch 2: val_root_mean_squared_error improved from 0.61812 to 0.49400, saving model to cache/ensemble_camembert-base/models/mlp/d63f653d22091a00061a83204896cece2b5f80614b4cf1d648815be5f7c16796_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4517 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9623,5.248505,2.290962,1.989212,5.248505
46,2.475,1.840263,1.356563,1.035875,1.840263
69,0.7816,1.25329,1.119504,0.935496,1.25329
92,0.7009,0.893308,0.94515,0.740615,0.893308
115,0.6659,1.078174,1.038351,0.732452,1.078174
138,0.4643,0.907352,0.95255,0.718505,0.907352
161,0.3577,1.023091,1.01148,0.677552,1.023091


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 9.0662 - root_mean_squared_error: 3.0110
Epoch 1: val_root_mean_squared_error improved from inf to 0.58107, saving model to cache/ensemble_camembert-base/models/mlp/9b8bb1f2957cdecfc3cf8eb772fa9ad458d9f6bc0519e49025c3d25577a338fa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8112 - root_mean_squared_error: 1.5946 - val_loss: 0.3376 - val_root_mean_squared_error: 0.5811
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2846 - root_mean_squared_error: 0.5335
Epoch 2: val_root_mean_squared_error did not improve from 0.58107
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4205 - root_mean_squared_error: 0.6461 - val_loss: 0.3722 - val_root_mean_squared_error: 0.6101
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1091,4.44729,2.10886,1.849114,4.44729
46,2.5083,1.331027,1.153702,0.881727,1.331027
69,0.7725,0.996947,0.998473,0.817043,0.996947
92,0.7383,0.938675,0.968852,0.764437,0.938675
115,0.5961,0.98279,0.991358,0.80782,0.98279
138,0.616,0.853604,0.923907,0.740235,0.853604
161,0.3317,0.875876,0.935882,0.706656,0.875876


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 5.1174 - root_mean_squared_error: 2.2622
Epoch 1: val_root_mean_squared_error improved from inf to 0.68275, saving model to cache/ensemble_camembert-base/models/mlp/5d7cdf0936f0a4e25ee11292edec8c52bc105f1482c8aab7178f37df73c5e15d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7362 - root_mean_squared_error: 1.5875 - val_loss: 0.4662 - val_root_mean_squared_error: 0.6828
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1722 - root_mean_squared_error: 0.4150
Epoch 2: val_root_mean_squared_error did not improve from 0.68275
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4907 - root_mean_squared_error: 0.6920 - val_loss: 0.6780 - val_root_mean_squared_error: 0.8234
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7054,2.824571,1.680646,1.477912,2.824571
46,2.488,0.666622,0.816469,0.704445,0.666622
69,0.7627,0.735689,0.857723,0.734863,0.735689
92,0.7182,0.560957,0.74897,0.671953,0.560957
115,0.6503,0.589773,0.767967,0.636666,0.589773
138,0.4853,0.515872,0.718242,0.622039,0.515872
161,0.3339,0.572463,0.756613,0.59851,0.572463


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 4.6273 - root_mean_squared_error: 2.1511
Epoch 1: val_root_mean_squared_error improved from inf to 0.94960, saving model to cache/ensemble_camembert-base/models/mlp/36cae4e60396940186b8a8af7df47a187e45d14a466c01cf6f5762720bea7086_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3616 - root_mean_squared_error: 1.4857 - val_loss: 0.9017 - val_root_mean_squared_error: 0.9496
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 1.1210 - root_mean_squared_error: 1.0588
Epoch 2: val_root_mean_squared_error improved from 0.94960 to 0.71395, saving model to cache/ensemble_camembert-base/models/mlp/36cae4e60396940186b8a8af7df47a187e45d14a466c01cf6f5762720bea7086_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4589 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7016,3.789817,1.946745,1.693302,3.789817
46,2.5606,1.106523,1.051914,0.874165,1.106523
69,0.9099,0.884247,0.940344,0.825549,0.884247
92,0.756,0.848494,0.921137,0.803046,0.848494
115,0.6669,0.712052,0.843832,0.708061,0.712052
138,0.663,0.666074,0.816134,0.646128,0.666074
161,0.3967,0.727508,0.852941,0.670646,0.727508


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 5.0768 - root_mean_squared_error: 2.2532
Epoch 1: val_root_mean_squared_error improved from inf to 1.09171, saving model to cache/ensemble_camembert-base/models/mlp/52e40882792762da3af77a9f62e4a66946752ca458244acab20968447e296845_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7326 - root_mean_squared_error: 1.5937 - val_loss: 1.1918 - val_root_mean_squared_error: 1.0917
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8330 - root_mean_squared_error: 0.9127
Epoch 2: val_root_mean_squared_error improved from 1.09171 to 0.63625, saving model to cache/ensemble_camembert-base/models/mlp/52e40882792762da3af77a9f62e4a66946752ca458244acab20968447e296845_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3982 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1238,3.129518,1.769044,1.605195,3.129518
46,2.3712,0.607656,0.779523,0.651343,0.607656
69,0.9312,0.600542,0.774946,0.643455,0.600542
92,0.8843,0.492786,0.701987,0.585741,0.492786
115,0.8019,0.447397,0.668877,0.582143,0.447397
138,0.7385,0.37189,0.609828,0.517655,0.37189
161,0.4353,0.479698,0.692602,0.539209,0.479698


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 7.8471 - root_mean_squared_error: 2.8013
Epoch 1: val_root_mean_squared_error improved from inf to 0.76401, saving model to cache/ensemble_camembert-base/models/mlp/d8a78bcc755f00beb30b0d66cdd1d080c47a38dc7dcb2bcf473dfd05ac6243a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7486 - root_mean_squared_error: 1.5862 - val_loss: 0.5837 - val_root_mean_squared_error: 0.7640
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5655 - root_mean_squared_error: 0.7520
Epoch 2: val_root_mean_squared_error improved from 0.76401 to 0.64366, saving model to cache/ensemble_camembert-base/models/mlp/d8a78bcc755f00beb30b0d66cdd1d080c47a38dc7dcb2bcf473dfd05ac6243a1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5408 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7292,4.07096,2.017662,1.839427,4.07096
46,2.6703,1.008225,1.004104,0.79573,1.008225
69,0.8315,0.723351,0.8505,0.675391,0.723351
92,0.8906,0.663251,0.814402,0.641469,0.663251
115,0.7152,0.692131,0.831944,0.66564,0.692131
138,0.4261,0.745035,0.863154,0.733412,0.745035
161,0.2554,0.68232,0.826026,0.698004,0.68232


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 5.6158 - root_mean_squared_error: 2.3698
Epoch 1: val_root_mean_squared_error improved from inf to 0.98242, saving model to cache/ensemble_camembert-base/models/mlp/fe00b919cce02eb6972c2be680c2edf4a3a1c7a230cf3ffc08c72dc0be23c7c6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3595 - root_mean_squared_error: 1.4676 - val_loss: 0.9652 - val_root_mean_squared_error: 0.9824
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9623 - root_mean_squared_error: 0.9810
Epoch 2: val_root_mean_squared_error improved from 0.98242 to 0.79315, saving model to cache/ensemble_camembert-base/models/mlp/fe00b919cce02eb6972c2be680c2edf4a3a1c7a230cf3ffc08c72dc0be23c7c6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7892 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3093,5.222558,2.285292,2.052039,5.222558
46,2.3492,1.612381,1.269796,0.976049,1.612381
69,0.7326,1.063927,1.031469,0.777696,1.063927
92,0.9367,1.056756,1.027987,0.769933,1.056756
115,0.6337,1.811316,1.345851,0.996589,1.811316
138,0.6913,1.507715,1.227891,0.88629,1.507715
161,0.5725,1.131815,1.063868,0.758292,1.131815


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.7000 - root_mean_squared_error: 2.7749
Epoch 1: val_root_mean_squared_error improved from inf to 0.81152, saving model to cache/ensemble_camembert-base/models/mlp/ab559aa5862b70a006233f7c6a0c8bf491458648d5a824224e0998757f9397cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8784 - root_mean_squared_error: 1.6224 - val_loss: 0.6586 - val_root_mean_squared_error: 0.8115
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.9924 - root_mean_squared_error: 0.9962
Epoch 2: val_root_mean_squared_error improved from 0.81152 to 0.43508, saving model to cache/ensemble_camembert-base/models/mlp/ab559aa5862b70a006233f7c6a0c8bf491458648d5a824224e0998757f9397cd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5119 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6498,3.974303,1.993566,1.733691,3.974304
46,2.8117,1.210409,1.100186,0.850335,1.210409
69,1.0333,1.011468,1.005718,0.839849,1.011468
92,0.7032,0.862459,0.928687,0.748152,0.862459
115,0.6275,0.677484,0.823094,0.69642,0.677484
138,0.4257,0.591481,0.769078,0.655208,0.591481
161,0.2986,0.488422,0.698872,0.588681,0.488422


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 689ms/step - loss: 7.1819 - root_mean_squared_error: 2.6799
Epoch 1: val_root_mean_squared_error improved from inf to 0.70513, saving model to cache/ensemble_camembert-base/models/mlp/f568d769bb4c9d71a8e806e0215d40555f905eba45fdb0137b769bdf42302fad_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1680 - root_mean_squared_error: 1.7013 - val_loss: 0.4972 - val_root_mean_squared_error: 0.7051
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2498 - root_mean_squared_error: 0.4998
Epoch 2: val_root_mean_squared_error improved from 0.70513 to 0.54764, saving model to cache/ensemble_camembert-base/models/mlp/f568d769bb4c9d71a8e806e0215d40555f905eba45fdb0137b769bdf42302fad_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2989 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0995,3.307212,1.818574,1.525475,3.307212
46,2.6284,1.056119,1.027677,0.799942,1.056119
69,0.9131,1.411184,1.187933,1.030519,1.411184
92,0.8313,1.270619,1.127218,0.973415,1.270619
115,0.6189,0.7912,0.889494,0.716085,0.7912
138,0.5471,1.008292,1.004137,0.860186,1.008292
161,0.3635,0.788575,0.888018,0.723756,0.788575


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 7.0351 - root_mean_squared_error: 2.6524
Epoch 1: val_root_mean_squared_error improved from inf to 0.47816, saving model to cache/ensemble_camembert-base/models/mlp/2a1450dcea469212b21ac24506fdbdb637f168aa1883cb2e960ac29c15a26b4b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0093 - root_mean_squared_error: 1.6661 - val_loss: 0.2286 - val_root_mean_squared_error: 0.4782
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1847 - root_mean_squared_error: 0.4298
Epoch 2: val_root_mean_squared_error improved from 0.47816 to 0.44016, saving model to cache/ensemble_camembert-base/models/mlp/2a1450dcea469212b21ac24506fdbdb637f168aa1883cb2e960ac29c15a26b4b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3654 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9853,4.52626,2.127501,1.940264,4.52626
46,2.7214,1.183847,1.088047,0.856829,1.183847
69,0.8272,0.733429,0.856405,0.688624,0.733429
92,0.6673,0.658939,0.811751,0.609066,0.658939
115,0.5769,0.814272,0.90237,0.707476,0.814272
138,0.4573,0.669735,0.818373,0.614964,0.669735
161,0.3604,0.704253,0.839198,0.633934,0.704253


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 689ms/step - loss: 5.7423 - root_mean_squared_error: 2.3963
Epoch 1: val_root_mean_squared_error improved from inf to 0.81316, saving model to cache/ensemble_camembert-base/models/mlp/a8c50e0ccfbdf8289669f3831aca27e988fa7a974369f1f360490b0c10204ec2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.2606 - root_mean_squared_error: 1.4402 - val_loss: 0.6612 - val_root_mean_squared_error: 0.8132
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3484 - root_mean_squared_error: 0.5902
Epoch 2: val_root_mean_squared_error improved from 0.81316 to 0.41194, saving model to cache/ensemble_camembert-base/models/mlp/a8c50e0ccfbdf8289669f3831aca27e988fa7a974369f1f360490b0c10204ec2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4918 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.7412,4.415265,2.101253,1.901992,4.415265
46,2.6945,1.262623,1.123665,0.873436,1.262623
69,0.9292,0.836362,0.914528,0.802433,0.836362
92,0.739,0.620765,0.787886,0.626138,0.620765
115,0.4836,0.604801,0.77769,0.630327,0.604801
138,0.3919,0.641404,0.800877,0.657786,0.641404
161,0.2602,0.680771,0.825089,0.668742,0.680771


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 4.1782 - root_mean_squared_error: 2.0441
Epoch 1: val_root_mean_squared_error improved from inf to 0.58677, saving model to cache/ensemble_camembert-base/models/mlp/029cb19e3a95824d844fbc08009f74d622678a2a405e362220d0376b1e3a1455_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5374 - root_mean_squared_error: 1.5375 - val_loss: 0.3443 - val_root_mean_squared_error: 0.5868
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3878 - root_mean_squared_error: 0.6227
Epoch 2: val_root_mean_squared_error improved from 0.58677 to 0.57311, saving model to cache/ensemble_camembert-base/models/mlp/029cb19e3a95824d844fbc08009f74d622678a2a405e362220d0376b1e3a1455_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6163 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9049,4.349976,2.08566,1.897744,4.349976
46,2.4256,1.064448,1.031721,0.863656,1.064448
69,0.8971,0.646794,0.804235,0.662427,0.646794
92,0.7419,0.650343,0.806438,0.661778,0.650343
115,0.7695,0.647779,0.804847,0.671635,0.647779
138,0.585,0.634293,0.796425,0.669473,0.634293
161,0.5853,0.597823,0.77319,0.65325,0.597823


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 674ms/step - loss: 9.5771 - root_mean_squared_error: 3.0947
Epoch 1: val_root_mean_squared_error improved from inf to 0.75009, saving model to cache/ensemble_camembert-base/models/mlp/cc10f007e808c8dc21f55f1c2a0c21c56a16da9adedad783023f786659c2d540_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3326 - root_mean_squared_error: 1.7442 - val_loss: 0.5626 - val_root_mean_squared_error: 0.7501
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.6010 - root_mean_squared_error: 0.7752
Epoch 2: val_root_mean_squared_error improved from 0.75009 to 0.64180, saving model to cache/ensemble_camembert-base/models/mlp/cc10f007e808c8dc21f55f1c2a0c21c56a16da9adedad783023f786659c2d540_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5666 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1981,3.110371,1.763624,1.52831,3.110371
46,2.3161,0.824888,0.908233,0.631447,0.824888
69,0.9371,0.867083,0.931173,0.783528,0.867083
92,0.6947,0.726903,0.852586,0.708792,0.726903
115,0.4961,0.540769,0.73537,0.562376,0.540769
138,0.5214,0.82466,0.908108,0.755168,0.82466
161,0.3013,0.61668,0.78529,0.608016,0.61668


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 5.6726 - root_mean_squared_error: 2.3817
Epoch 1: val_root_mean_squared_error improved from inf to 0.71447, saving model to cache/ensemble_camembert-base/models/mlp/facef04534cb76ceb729f0fe40b42eae02a897d75b6e54c654d350b2882cf476_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3075 - root_mean_squared_error: 1.4588 - val_loss: 0.5105 - val_root_mean_squared_error: 0.7145
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.3314 - root_mean_squared_error: 0.5757
Epoch 2: val_root_mean_squared_error improved from 0.71447 to 0.63722, saving model to cache/ensemble_camembert-base/models/mlp/facef04534cb76ceb729f0fe40b42eae02a897d75b6e54c654d350b2882cf476_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5955 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6308,2.762345,1.662031,1.498469,2.762345
46,2.7194,0.569175,0.754437,0.614127,0.569175
69,0.8871,0.544661,0.738012,0.651908,0.544661
92,0.7338,0.607411,0.779366,0.677505,0.607411
115,0.5935,0.298132,0.546014,0.446821,0.298132
138,0.4626,0.287796,0.536466,0.421013,0.287796
161,0.3027,0.392576,0.626559,0.484404,0.392576


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 8.5627 - root_mean_squared_error: 2.9262
Epoch 1: val_root_mean_squared_error improved from inf to 0.77828, saving model to cache/ensemble_camembert-base/models/mlp/34f11206e998742daa38eab2cc516982d5615cb3084e509514f0b25124fe293a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0294 - root_mean_squared_error: 1.6551 - val_loss: 0.6057 - val_root_mean_squared_error: 0.7783
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.9149 - root_mean_squared_error: 0.9565
Epoch 2: val_root_mean_squared_error improved from 0.77828 to 0.50646, saving model to cache/ensemble_camembert-base/models/mlp/34f11206e998742daa38eab2cc516982d5615cb3084e509514f0b25124fe293a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5453 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5755,4.995797,2.235128,2.010234,4.995797
46,2.7345,1.503701,1.226255,0.960608,1.503701
69,0.8846,0.925439,0.961997,0.787992,0.925439
92,0.8509,0.711452,0.843476,0.661004,0.711452
115,0.6469,0.657791,0.811043,0.623951,0.65779
138,0.5907,0.762767,0.873366,0.718838,0.762767
161,0.4306,0.660718,0.812846,0.65402,0.660718


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 11.0479 - root_mean_squared_error: 3.3238
Epoch 1: val_root_mean_squared_error improved from inf to 0.65936, saving model to cache/ensemble_camembert-base/models/mlp/ad5842e273f10b3c02181f42dd7e8ff7553572de5fb264645c719022d51d24e6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0715 - root_mean_squared_error: 1.6551 - val_loss: 0.4348 - val_root_mean_squared_error: 0.6594
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6455 - root_mean_squared_error: 0.8034
Epoch 2: val_root_mean_squared_error did not improve from 0.65936
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4943 - root_mean_squared_error: 0.6986 - val_loss: 0.8211 - val_root_mean_squared_error: 0.9061
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - los

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4673,4.732597,2.175453,1.974744,4.732597
46,2.4211,1.246933,1.116661,0.867662,1.246933
69,0.9047,0.835466,0.914038,0.754902,0.835466
92,0.7698,0.805011,0.897224,0.741224,0.805011
115,0.7177,0.813571,0.901982,0.729069,0.813571
138,0.4897,0.664086,0.814914,0.685129,0.664086
161,0.3118,0.655306,0.80951,0.659627,0.655306


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 8.0300 - root_mean_squared_error: 2.8337
Epoch 1: val_root_mean_squared_error improved from inf to 0.85014, saving model to cache/ensemble_camembert-base/models/mlp/df45abc26f9516b0985c41ef1fbd296225b6abce812212a05f7de3a2b5ec57a9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0147 - root_mean_squared_error: 1.6594 - val_loss: 0.7227 - val_root_mean_squared_error: 0.8501
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3174 - root_mean_squared_error: 0.5634
Epoch 2: val_root_mean_squared_error improved from 0.85014 to 0.51083, saving model to cache/ensemble_camembert-base/models/mlp/df45abc26f9516b0985c41ef1fbd296225b6abce812212a05f7de3a2b5ec57a9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3964 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.364,3.241004,1.800279,1.647847,3.241004
46,2.7356,0.645529,0.803448,0.62435,0.645529
69,0.9882,0.657444,0.810829,0.688534,0.657444
92,0.7348,0.463837,0.681056,0.585376,0.463837
115,0.5308,0.433124,0.658121,0.568264,0.433124
138,0.4957,0.449414,0.670383,0.547059,0.449414
161,0.2809,0.455586,0.674971,0.57658,0.455586


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 7.6477 - root_mean_squared_error: 2.7655
Epoch 1: val_root_mean_squared_error improved from inf to 1.05806, saving model to cache/ensemble_camembert-base/models/mlp/37c75ab5a5d47915158d5fbcd19515f465ec693a96e42e62adb86d30a71e561d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5713 - root_mean_squared_error: 1.5301 - val_loss: 1.1195 - val_root_mean_squared_error: 1.0581
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.1943 - root_mean_squared_error: 1.0928
Epoch 2: val_root_mean_squared_error improved from 1.05806 to 0.46758, saving model to cache/ensemble_camembert-base/models/mlp/37c75ab5a5d47915158d5fbcd19515f465ec693a96e42e62adb86d30a71e561d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5672 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.48,5.251461,2.291607,2.031037,5.251461
46,2.7596,1.79895,1.341249,1.04626,1.79895
69,0.9887,1.145869,1.070453,0.871551,1.145869
92,0.7285,1.078358,1.03844,0.840505,1.078358
115,0.7587,0.954313,0.97689,0.769206,0.954313
138,0.5547,1.032976,1.016355,0.794699,1.032977
161,0.2442,0.996112,0.998054,0.802875,0.996112


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 5.6572 - root_mean_squared_error: 2.3785
Epoch 1: val_root_mean_squared_error improved from inf to 0.92359, saving model to cache/ensemble_camembert-base/models/mlp/39ec40fcea04f18d768c269aeac728a9c16749968ea4a27a43be972deda9c4b0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3797 - root_mean_squared_error: 1.4811 - val_loss: 0.8530 - val_root_mean_squared_error: 0.9236
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2670 - root_mean_squared_error: 0.5167
Epoch 2: val_root_mean_squared_error improved from 0.92359 to 0.83249, saving model to cache/ensemble_camembert-base/models/mlp/39ec40fcea04f18d768c269aeac728a9c16749968ea4a27a43be972deda9c4b0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4866 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8058,4.302574,2.074265,1.853103,4.302574
46,2.8135,1.157563,1.075901,0.860289,1.157563
69,0.8003,0.86262,0.928773,0.765814,0.86262
92,1.0131,0.914435,0.956261,0.788541,0.914435
115,0.6504,0.909057,0.953445,0.700244,0.909057
138,0.556,0.990098,0.995037,0.722791,0.990098
161,0.294,0.955352,0.977421,0.709497,0.955352


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 7.4628 - root_mean_squared_error: 2.7318
Epoch 1: val_root_mean_squared_error improved from inf to 0.76360, saving model to cache/ensemble_camembert-base/models/mlp/e626bb537cba3c7f4ad255c387a250ece611403476f9b91b77a8afcd3b3aad26_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7993 - root_mean_squared_error: 1.5922 - val_loss: 0.5831 - val_root_mean_squared_error: 0.7636
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5153 - root_mean_squared_error: 0.7179
Epoch 2: val_root_mean_squared_error did not improve from 0.76360
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4427 - root_mean_squared_error: 0.6591 - val_loss: 1.8867 - val_root_mean_squared_error: 1.3736
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.552,3.544122,1.882584,1.702998,3.544122
46,2.428,0.784157,0.885526,0.738653,0.784157
69,0.7612,0.69649,0.83456,0.727032,0.69649
92,0.7153,0.496213,0.704424,0.561615,0.496213
115,0.5417,0.652605,0.80784,0.609766,0.652606
138,0.3435,0.629644,0.793501,0.600321,0.629644
161,0.2526,0.643377,0.802108,0.615944,0.643377


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 6.4231 - root_mean_squared_error: 2.5344
Epoch 1: val_root_mean_squared_error improved from inf to 0.70593, saving model to cache/ensemble_camembert-base/models/mlp/8f248700cab9594c85e8dd6fa07bd4718c8b835acdb6cb0fcfae3134c0b2ffa4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4184 - root_mean_squared_error: 1.4892 - val_loss: 0.4983 - val_root_mean_squared_error: 0.7059
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.4149 - root_mean_squared_error: 0.6441
Epoch 2: val_root_mean_squared_error did not improve from 0.70593
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3833 - root_mean_squared_error: 0.6163 - val_loss: 0.5187 - val_root_mean_squared_error: 0.7202
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5676,4.044935,2.011202,1.775331,4.044935
46,2.1549,1.125347,1.060824,0.763094,1.125347
69,0.8934,0.867175,0.931222,0.665315,0.867175
92,0.7314,0.796711,0.892587,0.619596,0.796711
115,0.6059,0.709569,0.842359,0.617986,0.709569
138,0.5064,0.755169,0.869005,0.669681,0.755169
161,0.3832,0.801883,0.895479,0.725664,0.801883


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 7.3804 - root_mean_squared_error: 2.7167
Epoch 1: val_root_mean_squared_error improved from inf to 0.71205, saving model to cache/ensemble_camembert-base/models/mlp/d17df1898d7112ea92d531b496966fd4bfd36d8b85efc1d82405f5c7c13f3e8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2818 - root_mean_squared_error: 1.7290 - val_loss: 0.5070 - val_root_mean_squared_error: 0.7121
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3108 - root_mean_squared_error: 0.5575
Epoch 2: val_root_mean_squared_error improved from 0.71205 to 0.55750, saving model to cache/ensemble_camembert-base/models/mlp/d17df1898d7112ea92d531b496966fd4bfd36d8b85efc1d82405f5c7c13f3e8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4007 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.6888,3.013263,1.735875,1.593786,3.013263
46,2.9133,0.600412,0.774863,0.62882,0.600412
69,0.8783,0.549649,0.741383,0.586311,0.549649
92,0.6302,0.624977,0.790555,0.641916,0.624977
115,0.5305,0.936611,0.967786,0.820472,0.936611
138,0.4184,0.718372,0.847568,0.712277,0.718372
161,0.3897,0.772993,0.8792,0.728652,0.772993


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 680ms/step - loss: 9.4310 - root_mean_squared_error: 3.0710
Epoch 1: val_root_mean_squared_error improved from inf to 0.73857, saving model to cache/ensemble_camembert-base/models/mlp/a4030d1aeb8a5ed6cc179bddc801168ef5f67e6e7785868e19462620ea9b102d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1792 - root_mean_squared_error: 1.6916 - val_loss: 0.5455 - val_root_mean_squared_error: 0.7386
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1916 - root_mean_squared_error: 0.4377
Epoch 2: val_root_mean_squared_error improved from 0.73857 to 0.59401, saving model to cache/ensemble_camembert-base/models/mlp/a4030d1aeb8a5ed6cc179bddc801168ef5f67e6e7785868e19462620ea9b102d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3620 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9448,4.93152,2.220703,2.063189,4.93152
46,2.7116,1.357952,1.165312,0.929975,1.357952
69,1.0105,0.6654,0.81572,0.683064,0.6654
92,0.7287,0.546978,0.73958,0.581143,0.546978
115,0.5898,0.710661,0.843007,0.620642,0.710661
138,0.4342,0.580828,0.762121,0.602517,0.580828
161,0.2652,0.692554,0.832198,0.603528,0.692554


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 688ms/step - loss: 6.0998 - root_mean_squared_error: 2.4698
Epoch 1: val_root_mean_squared_error improved from inf to 0.60527, saving model to cache/ensemble_camembert-base/models/mlp/77f569c07dd1eac26d720462e7d0af96c3c7e731bee32bab8b67b5319fda50ba_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4316 - root_mean_squared_error: 1.4903 - val_loss: 0.3663 - val_root_mean_squared_error: 0.6053
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - loss: 0.1424 - root_mean_squared_error: 0.3774
Epoch 2: val_root_mean_squared_error did not improve from 0.60527
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3804 - root_mean_squared_error: 0.6099 - val_loss: 0.8692 - val_root_mean_squared_error: 0.9323
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4355,4.353471,2.086497,1.89908,4.353471
46,2.578,1.113791,1.055363,0.868798,1.113791
69,0.8308,0.780268,0.883328,0.702227,0.780268
92,0.7414,0.638642,0.799151,0.663732,0.638642
115,0.5808,0.818581,0.904755,0.734182,0.818581
138,0.445,0.673123,0.820441,0.675518,0.673123
161,0.2659,0.700149,0.836749,0.671592,0.700149


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 5.2138 - root_mean_squared_error: 2.2834
Epoch 1: val_root_mean_squared_error improved from inf to 0.61667, saving model to cache/ensemble_camembert-base/models/mlp/d26ae0a20ae743737145596f4d9c5ea917cc0e6b21952bd0cc08f0910e00ea63_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8473 - root_mean_squared_error: 1.6138 - val_loss: 0.3803 - val_root_mean_squared_error: 0.6167
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2736 - root_mean_squared_error: 0.5230
Epoch 2: val_root_mean_squared_error did not improve from 0.61667
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.3093 - root_mean_squared_error: 0.5538 - val_loss: 0.9389 - val_root_mean_squared_error: 0.9690
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7776,4.782668,2.186931,1.950998,4.782668
46,2.5523,1.402107,1.184106,0.993845,1.402107
69,0.806,0.950647,0.975011,0.841912,0.950647
92,0.6514,0.763213,0.873621,0.723377,0.763213
115,0.607,0.735914,0.857854,0.649478,0.735914
138,0.5421,1.060734,1.029919,0.819769,1.060734
161,0.3683,0.861703,0.92828,0.749268,0.861703


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 685ms/step - loss: 7.9752 - root_mean_squared_error: 2.8240
Epoch 1: val_root_mean_squared_error improved from inf to 0.77624, saving model to cache/ensemble_camembert-base/models/mlp/4c56ac4a9ea9ed8677b4d1f4451e9e25aec3420fa32ee50aa6bbc7dad24a93b5_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8170 - root_mean_squared_error: 1.5984 - val_loss: 0.6025 - val_root_mean_squared_error: 0.7762
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2450 - root_mean_squared_error: 0.4949
Epoch 2: val_root_mean_squared_error improved from 0.77624 to 0.69752, saving model to cache/ensemble_camembert-base/models/mlp/4c56ac4a9ea9ed8677b4d1f4451e9e25aec3420fa32ee50aa6bbc7dad24a93b5_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3364 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6192,4.026591,2.006637,1.805106,4.026591
46,2.5676,0.9794,0.989647,0.731967,0.9794
69,0.8657,0.715724,0.846005,0.665791,0.715724
92,0.7668,0.552072,0.743015,0.587893,0.552072
115,0.4432,0.577977,0.760248,0.623251,0.577977
138,0.4317,0.545838,0.738808,0.614998,0.545838
161,0.1907,0.46861,0.684551,0.530596,0.46861


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 6.8003 - root_mean_squared_error: 2.6077
Epoch 1: val_root_mean_squared_error improved from inf to 0.57354, saving model to cache/ensemble_camembert-base/models/mlp/c0cbf6959310af5e3a02129fce938634c200197d3ba21af4fd824c63129c64dc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6716 - root_mean_squared_error: 1.5687 - val_loss: 0.3289 - val_root_mean_squared_error: 0.5735
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2583 - root_mean_squared_error: 0.5082
Epoch 2: val_root_mean_squared_error improved from 0.57354 to 0.46969, saving model to cache/ensemble_camembert-base/models/mlp/c0cbf6959310af5e3a02129fce938634c200197d3ba21af4fd824c63129c64dc_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3948 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4935,3.764295,1.940179,1.774407,3.764295
46,2.6853,0.855435,0.924897,0.738652,0.855435
69,0.8583,0.591993,0.769411,0.579187,0.591993
92,0.7093,0.556832,0.746211,0.557124,0.556831
115,0.6547,0.500614,0.707541,0.563387,0.500614
138,0.5626,0.408764,0.639347,0.531357,0.408764
161,0.293,0.433218,0.658193,0.537899,0.433218


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 5.4357 - root_mean_squared_error: 2.3315
Epoch 1: val_root_mean_squared_error improved from inf to 0.78291, saving model to cache/ensemble_camembert-base/models/mlp/436e7d794d2ae85a585f44f4dbf0544bffce9e73d769a22e5512f743b81de5ff_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3396 - root_mean_squared_error: 1.4735 - val_loss: 0.6130 - val_root_mean_squared_error: 0.7829
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9400 - root_mean_squared_error: 0.9695
Epoch 2: val_root_mean_squared_error improved from 0.78291 to 0.74066, saving model to cache/ensemble_camembert-base/models/mlp/436e7d794d2ae85a585f44f4dbf0544bffce9e73d769a22e5512f743b81de5ff_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.7512 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4862,4.786478,2.187802,1.919069,4.786478
46,2.5736,1.478787,1.216054,0.941235,1.478787
69,0.8812,1.100771,1.049176,0.864982,1.100771
92,0.7266,0.797028,0.892764,0.768311,0.797028
115,0.5581,0.731612,0.855343,0.698644,0.731612
138,0.4204,0.649731,0.806059,0.68666,0.649731
161,0.4252,0.716397,0.846402,0.67556,0.716397


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 6.5294 - root_mean_squared_error: 2.5553
Epoch 1: val_root_mean_squared_error improved from inf to 0.70283, saving model to cache/ensemble_camembert-base/models/mlp/b45044d996c11d2c79da9b6e32f056dbd8905c6dbd90f85b289eaa1883f4bdca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8137 - root_mean_squared_error: 1.6167 - val_loss: 0.4940 - val_root_mean_squared_error: 0.7028
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5875 - root_mean_squared_error: 0.7665
Epoch 2: val_root_mean_squared_error improved from 0.70283 to 0.64881, saving model to cache/ensemble_camembert-base/models/mlp/b45044d996c11d2c79da9b6e32f056dbd8905c6dbd90f85b289eaa1883f4bdca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5263 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9097,5.144811,2.268218,2.003262,5.144811
46,2.8501,1.770671,1.330666,1.039688,1.770672
69,1.0454,0.973428,0.986625,0.805815,0.973429
92,0.7789,1.105297,1.051331,0.900492,1.105297
115,0.646,0.739582,0.85999,0.710043,0.739582
138,0.55,0.647077,0.804411,0.630085,0.647077
161,0.3295,0.646575,0.804099,0.594512,0.646575


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 5.8941 - root_mean_squared_error: 2.4278
Epoch 1: val_root_mean_squared_error improved from inf to 0.98841, saving model to cache/ensemble_camembert-base/models/mlp/20e2dc881b62a2102e48574eaab177b1651038e73f0fecd63ccd589e6cc3036b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6460 - root_mean_squared_error: 1.5559 - val_loss: 0.9770 - val_root_mean_squared_error: 0.9884
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5771 - root_mean_squared_error: 0.7597
Epoch 2: val_root_mean_squared_error improved from 0.98841 to 0.51609, saving model to cache/ensemble_camembert-base/models/mlp/20e2dc881b62a2102e48574eaab177b1651038e73f0fecd63ccd589e6cc3036b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3290 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9613,4.023916,2.00597,1.853598,4.023917
46,2.7382,0.850659,0.922312,0.750707,0.850659
69,1.0117,0.588821,0.767346,0.588032,0.588821
92,0.6965,0.569871,0.754898,0.581029,0.569871
115,0.6065,0.604414,0.777441,0.592666,0.604414
138,0.541,0.915841,0.956996,0.76281,0.915841
161,0.335,0.788358,0.887895,0.718889,0.788358


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 7.7897 - root_mean_squared_error: 2.7910
Epoch 1: val_root_mean_squared_error improved from inf to 0.90905, saving model to cache/ensemble_camembert-base/models/mlp/65affc2c1a6fb4882786c9f51e6acb1386639b53acf4aefdee8f10a536775692_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.6597 - root_mean_squared_error: 1.5508 - val_loss: 0.8264 - val_root_mean_squared_error: 0.9090
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3677 - root_mean_squared_error: 0.6064
Epoch 2: val_root_mean_squared_error improved from 0.90905 to 0.83622, saving model to cache/ensemble_camembert-base/models/mlp/65affc2c1a6fb4882786c9f51e6acb1386639b53acf4aefdee8f10a536775692_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5804 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2325,3.211991,1.792203,1.557087,3.211991
46,2.617,0.856867,0.925671,0.655583,0.856867
69,1.0662,0.912037,0.955006,0.832424,0.912037
92,0.853,0.88901,0.942873,0.798337,0.88901
115,0.7166,0.705123,0.839716,0.621662,0.705123
138,0.4383,0.838921,0.915926,0.674588,0.838921
161,0.3053,0.712996,0.844391,0.634127,0.712996


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 9.7503 - root_mean_squared_error: 3.1226
Epoch 1: val_root_mean_squared_error improved from inf to 0.58329, saving model to cache/ensemble_camembert-base/models/mlp/7bac9d80c284abe30ebff2e0189d77fc113418a29546fd977745a7dffaf0a51b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.5597 - root_mean_squared_error: 1.7867 - val_loss: 0.3402 - val_root_mean_squared_error: 0.5833
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3272 - root_mean_squared_error: 0.5720
Epoch 2: val_root_mean_squared_error did not improve from 0.58329
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3617 - root_mean_squared_error: 0.6010 - val_loss: 0.3918 - val_root_mean_squared_error: 0.6260
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.252,3.461821,1.860597,1.664574,3.461821
46,2.6464,0.856058,0.925234,0.728613,0.856058
69,0.7703,0.629633,0.793494,0.647021,0.629633
92,0.6405,0.647063,0.804402,0.653055,0.647063
115,0.488,0.684384,0.827275,0.639448,0.684384
138,0.4568,0.684952,0.827618,0.652314,0.684952
161,0.3234,0.734754,0.857178,0.658142,0.734754


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 7.6746 - root_mean_squared_error: 2.7703
Epoch 1: val_root_mean_squared_error improved from inf to 0.82927, saving model to cache/ensemble_camembert-base/models/mlp/055da26a423a24b552e3b5e3a2863d2acdedd9779ee728efaad7c6a29c6fa52c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7321 - root_mean_squared_error: 1.5770 - val_loss: 0.6877 - val_root_mean_squared_error: 0.8293
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5570 - root_mean_squared_error: 0.7463
Epoch 2: val_root_mean_squared_error improved from 0.82927 to 0.68715, saving model to cache/ensemble_camembert-base/models/mlp/055da26a423a24b552e3b5e3a2863d2acdedd9779ee728efaad7c6a29c6fa52c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4917 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9731,4.926115,2.219485,1.980331,4.926115
46,2.8173,1.626813,1.275466,0.890202,1.626813
69,0.8402,1.29769,1.139162,0.830069,1.29769
92,0.6793,0.970912,0.985349,0.727688,0.970912
115,0.6949,1.091771,1.044878,0.767083,1.091771
138,0.4893,0.896269,0.946715,0.721545,0.896269
161,0.3249,0.85514,0.924738,0.672063,0.85514


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 684ms/step - loss: 10.0564 - root_mean_squared_error: 3.1712
Epoch 1: val_root_mean_squared_error improved from inf to 0.68327, saving model to cache/ensemble_camembert-base/models/mlp/c944a7f1a0f7fedf47606afbbe0577897311f40e490db1aac90bce3b2bb3d221_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3957 - root_mean_squared_error: 1.7493 - val_loss: 0.4669 - val_root_mean_squared_error: 0.6833
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.2888 - root_mean_squared_error: 0.5374
Epoch 2: val_root_mean_squared_error improved from 0.68327 to 0.55529, saving model to cache/ensemble_camembert-base/models/mlp/c944a7f1a0f7fedf47606afbbe0577897311f40e490db1aac90bce3b2bb3d221_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4607 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7306,4.056648,2.014112,1.807555,4.056648
46,2.4989,1.044428,1.021973,0.776112,1.044428
69,0.8897,0.759846,0.871691,0.688929,0.759846
92,0.7167,0.704689,0.839457,0.702445,0.704689
115,0.7554,0.616432,0.785132,0.585638,0.616432
138,0.3813,0.628561,0.792818,0.604556,0.628561
161,0.282,0.738346,0.85927,0.652404,0.738346


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 679ms/step - loss: 6.1088 - root_mean_squared_error: 2.4716
Epoch 1: val_root_mean_squared_error improved from inf to 0.54992, saving model to cache/ensemble_camembert-base/models/mlp/b0fde418d37f22d0d5b5c110bf7c7c5fce90b5ca7dd94b521d5b08417a4cd725_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4311 - root_mean_squared_error: 1.4861 - val_loss: 0.3024 - val_root_mean_squared_error: 0.5499
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4068 - root_mean_squared_error: 0.6378
Epoch 2: val_root_mean_squared_error did not improve from 0.54992
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4166 - root_mean_squared_error: 0.6393 - val_loss: 0.3354 - val_root_mean_squared_error: 0.5792
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8364,4.398344,2.097223,1.904568,4.398343
46,2.9839,1.187798,1.089862,0.866394,1.187798
69,0.9498,0.655888,0.809869,0.623001,0.655888
92,0.7538,0.602655,0.776309,0.637776,0.602655
115,0.534,0.612696,0.782749,0.633666,0.612696
138,0.3734,0.582413,0.76316,0.611585,0.582413
161,0.2917,0.62076,0.787883,0.661551,0.62076


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 7.7620 - root_mean_squared_error: 2.7860
Epoch 1: val_root_mean_squared_error improved from inf to 1.05477, saving model to cache/ensemble_camembert-base/models/mlp/06fa20ad0b456dfb06cfde9187868e2249f8f0912994bfbcbf1452469e725334_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6504 - root_mean_squared_error: 1.5436 - val_loss: 1.1125 - val_root_mean_squared_error: 1.0548
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7470 - root_mean_squared_error: 0.8643
Epoch 2: val_root_mean_squared_error improved from 1.05477 to 0.87050, saving model to cache/ensemble_camembert-base/models/mlp/06fa20ad0b456dfb06cfde9187868e2249f8f0912994bfbcbf1452469e725334_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5898 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8331,3.502621,1.871529,1.684677,3.502621
46,3.1277,0.856024,0.925216,0.701489,0.856024
69,0.8173,0.55126,0.742469,0.652115,0.55126
92,0.8457,0.443413,0.665892,0.521121,0.443413
115,0.6304,0.46124,0.679147,0.490823,0.46124
138,0.5381,0.548188,0.740398,0.591666,0.548188
161,0.387,0.521375,0.722063,0.525717,0.521375


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 7.5668 - root_mean_squared_error: 2.7508
Epoch 1: val_root_mean_squared_error improved from inf to 0.58119, saving model to cache/ensemble_camembert-base/models/mlp/3d3af924d189cfeb3f8d2ae7288a756ed8b116c4efc6a01f8122c8f3c9d40341_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7256 - root_mean_squared_error: 1.5712 - val_loss: 0.3378 - val_root_mean_squared_error: 0.5812
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2744 - root_mean_squared_error: 0.5238
Epoch 2: val_root_mean_squared_error did not improve from 0.58119
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6551 - root_mean_squared_error: 0.7992 - val_loss: 0.6242 - val_root_mean_squared_error: 0.7901
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2377,4.484953,2.117771,1.904129,4.484953
46,2.5492,1.212818,1.10128,0.901007,1.212818
69,0.9338,0.89562,0.946372,0.762769,0.895621
92,0.6141,0.66635,0.816303,0.690353,0.66635
115,0.621,0.609937,0.780985,0.673364,0.609937
138,0.5502,0.679226,0.824152,0.718429,0.679226
161,0.3712,0.658085,0.811225,0.723418,0.658085


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 5.0576 - root_mean_squared_error: 2.2489
Epoch 1: val_root_mean_squared_error improved from inf to 0.60721, saving model to cache/ensemble_camembert-base/models/mlp/90d3c73e5c746e18a33d9d3dddcaf150f251aec4f94b7d7ac30e8364d4ec6116_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7111 - root_mean_squared_error: 1.5934 - val_loss: 0.3687 - val_root_mean_squared_error: 0.6072
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1930 - root_mean_squared_error: 0.4393
Epoch 2: val_root_mean_squared_error improved from 0.60721 to 0.59975, saving model to cache/ensemble_camembert-base/models/mlp/90d3c73e5c746e18a33d9d3dddcaf150f251aec4f94b7d7ac30e8364d4ec6116_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6235 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8328,4.811488,2.19351,1.965059,4.811488
46,2.5054,1.28362,1.13297,0.884783,1.28362
69,0.8177,0.855982,0.925193,0.779873,0.855982
92,0.6909,0.747204,0.86441,0.682737,0.747204
115,0.5611,0.673345,0.820576,0.656056,0.673345
138,0.4826,0.882979,0.93967,0.695029,0.882979
161,0.2588,0.760995,0.87235,0.64162,0.760995


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 691ms/step - loss: 7.6993 - root_mean_squared_error: 2.7748
Epoch 1: val_root_mean_squared_error improved from inf to 0.93892, saving model to cache/ensemble_camembert-base/models/mlp/73acf978ad8f41e98dd0e9e08f275aac6cf8e1a50f0ff2c252f07b49f3dc0879_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9410 - root_mean_squared_error: 1.6323 - val_loss: 0.8816 - val_root_mean_squared_error: 0.9389
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 0.4468 - root_mean_squared_error: 0.6684
Epoch 2: val_root_mean_squared_error improved from 0.93892 to 0.64649, saving model to cache/ensemble_camembert-base/models/mlp/73acf978ad8f41e98dd0e9e08f275aac6cf8e1a50f0ff2c252f07b49f3dc0879_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3198 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1443,5.301455,2.302489,2.072931,5.301455
46,2.889,1.651086,1.284946,0.993413,1.651086
69,0.8796,0.999962,0.999981,0.826264,0.999962
92,0.7591,1.026588,1.013207,0.761924,1.026588
115,0.5575,0.96122,0.980418,0.783465,0.96122
138,0.4271,0.980178,0.990039,0.776067,0.980178
161,0.3642,0.932074,0.96544,0.767821,0.932074


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 653ms/step - loss: 6.6713 - root_mean_squared_error: 2.5829
Epoch 1: val_root_mean_squared_error improved from inf to 0.72610, saving model to cache/ensemble_camembert-base/models/mlp/614251bb412d8bb31c7b47a413d1df59922aad4b7c9a8e70a43f5d658c926367_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1122 - root_mean_squared_error: 1.6855 - val_loss: 0.5272 - val_root_mean_squared_error: 0.7261
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4446 - root_mean_squared_error: 0.6668
Epoch 2: val_root_mean_squared_error improved from 0.72610 to 0.52217, saving model to cache/ensemble_camembert-base/models/mlp/614251bb412d8bb31c7b47a413d1df59922aad4b7c9a8e70a43f5d658c926367_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4616 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1151,5.054897,2.24831,1.981658,5.054897
46,2.7786,1.593119,1.262188,1.000593,1.593119
69,0.7106,1.089648,1.043862,0.86415,1.089648
92,0.5517,0.846339,0.919967,0.740688,0.846339
115,0.5538,0.703573,0.838793,0.683898,0.703573
138,0.5465,0.495273,0.703756,0.511655,0.495273
161,0.3399,0.586273,0.765685,0.565867,0.586273


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 8.3900 - root_mean_squared_error: 2.8965
Epoch 1: val_root_mean_squared_error improved from inf to 0.54892, saving model to cache/ensemble_camembert-base/models/mlp/0ef4a28e534245723fd5781547e6471becf4f87dfb0793474aa234ec88389764_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3087 - root_mean_squared_error: 1.7357 - val_loss: 0.3013 - val_root_mean_squared_error: 0.5489
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.0998 - root_mean_squared_error: 0.3159
Epoch 2: val_root_mean_squared_error improved from 0.54892 to 0.45196, saving model to cache/ensemble_camembert-base/models/mlp/0ef4a28e534245723fd5781547e6471becf4f87dfb0793474aa234ec88389764_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3592 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8121,4.939594,2.22252,1.916676,4.939594
46,2.5699,1.71536,1.309718,1.011945,1.71536
69,0.7135,1.236099,1.1118,0.952304,1.236099
92,0.6093,0.931397,0.965089,0.786247,0.931397
115,0.5251,1.14075,1.068059,0.766051,1.14075
138,0.3292,0.967124,0.983425,0.70933,0.967124
161,0.261,1.092057,1.045015,0.749862,1.092057


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 9.7319 - root_mean_squared_error: 3.1196
Epoch 1: val_root_mean_squared_error improved from inf to 0.66242, saving model to cache/ensemble_camembert-base/models/mlp/797ef13d47dcbe26d654acb36196e11c1fecfa1995dc0fe00e8b78e0e3ac2258_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0205 - root_mean_squared_error: 1.6511 - val_loss: 0.4388 - val_root_mean_squared_error: 0.6624
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2469 - root_mean_squared_error: 0.4969
Epoch 2: val_root_mean_squared_error improved from 0.66242 to 0.54824, saving model to cache/ensemble_camembert-base/models/mlp/797ef13d47dcbe26d654acb36196e11c1fecfa1995dc0fe00e8b78e0e3ac2258_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4091 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2758,4.154639,2.038293,1.812663,4.154639
46,2.58,1.133979,1.064884,0.795987,1.133979
69,0.8012,0.828022,0.909957,0.718438,0.828022
92,0.6659,1.20385,1.097201,0.809976,1.20385
115,0.5147,0.819323,0.905165,0.754892,0.819323
138,0.4852,0.868491,0.931929,0.67563,0.868491
161,0.2547,0.889141,0.942943,0.6926,0.889141


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 4.9227 - root_mean_squared_error: 2.2187
Epoch 1: val_root_mean_squared_error improved from inf to 0.60353, saving model to cache/ensemble_camembert-base/models/mlp/bb9e7b05d0b7844846c139c788f682a4587c64a7747caf3ae01f00ffaec74d0d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7031 - root_mean_squared_error: 1.5797 - val_loss: 0.3643 - val_root_mean_squared_error: 0.6035
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1988 - root_mean_squared_error: 0.4458
Epoch 2: val_root_mean_squared_error did not improve from 0.60353
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5227 - root_mean_squared_error: 0.7144 - val_loss: 1.4527 - val_root_mean_squared_error: 1.2053
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4343,3.546639,1.883252,1.693664,3.546639
46,2.3382,0.827031,0.909413,0.731478,0.827031
69,0.7962,0.677547,0.823132,0.67495,0.677547
92,0.7338,0.650766,0.806701,0.701394,0.650767
115,0.5819,0.594716,0.771179,0.643435,0.594716
138,0.3886,0.656231,0.810081,0.655774,0.656231
161,0.2493,0.686925,0.828809,0.657955,0.686925


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 719ms/step - loss: 5.4651 - root_mean_squared_error: 2.3378
Epoch 1: val_root_mean_squared_error improved from inf to 1.01004, saving model to cache/ensemble_camembert-base/models/mlp/3ea417c9c0d8a0370d94890257240c44e30048fb93b1e6eeb4f211a208c36dd0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4310 - root_mean_squared_error: 1.4991 - val_loss: 1.0202 - val_root_mean_squared_error: 1.0100
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.4954 - root_mean_squared_error: 1.2229
Epoch 2: val_root_mean_squared_error improved from 1.01004 to 0.76773, saving model to cache/ensemble_camembert-base/models/mlp/3ea417c9c0d8a0370d94890257240c44e30048fb93b1e6eeb4f211a208c36dd0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7105 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6507,3.654781,1.911748,1.722245,3.654781
46,2.3161,0.909962,0.953919,0.766889,0.909962
69,0.9226,0.625461,0.790861,0.673083,0.625461
92,0.7031,0.515347,0.717877,0.598291,0.515347
115,0.63,0.552801,0.743506,0.605338,0.552801
138,0.6501,0.559038,0.747688,0.593812,0.559038
161,0.4479,0.410185,0.640457,0.538955,0.410185


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 4.8097 - root_mean_squared_error: 2.1931
Epoch 1: val_root_mean_squared_error improved from inf to 0.85934, saving model to cache/ensemble_camembert-base/models/mlp/378c10cb5ca9633e75c21c976a1735dc24958e2a55f8cca505c432d26cfe9f39_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3804 - root_mean_squared_error: 1.4847 - val_loss: 0.7385 - val_root_mean_squared_error: 0.8593
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4375 - root_mean_squared_error: 0.6614
Epoch 2: val_root_mean_squared_error improved from 0.85934 to 0.35006, saving model to cache/ensemble_camembert-base/models/mlp/378c10cb5ca9633e75c21c976a1735dc24958e2a55f8cca505c432d26cfe9f39_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3835 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9647,3.093054,1.758708,1.601737,3.093054
46,2.3348,0.581327,0.762448,0.659282,0.581327
69,0.9689,0.642603,0.801625,0.636097,0.642602
92,0.656,0.526468,0.725581,0.545769,0.526468
115,0.6063,0.490428,0.700305,0.595232,0.490428
138,0.3813,0.979212,0.989551,0.774885,0.979212
161,0.4051,0.405219,0.636568,0.523498,0.405219


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 8.3390 - root_mean_squared_error: 2.8877
Epoch 1: val_root_mean_squared_error improved from inf to 1.03796, saving model to cache/ensemble_camembert-base/models/mlp/fae9f592ac99d8425d0d402253b25bc6dc26419b629a0fc7cbe7710414ae9a8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0002 - root_mean_squared_error: 1.6532 - val_loss: 1.0774 - val_root_mean_squared_error: 1.0380
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0223 - root_mean_squared_error: 1.0111
Epoch 2: val_root_mean_squared_error improved from 1.03796 to 0.64276, saving model to cache/ensemble_camembert-base/models/mlp/fae9f592ac99d8425d0d402253b25bc6dc26419b629a0fc7cbe7710414ae9a8a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5972 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7825,3.946859,1.98667,1.808872,3.946859
46,2.5409,0.979497,0.989695,0.765682,0.979497
69,0.8967,0.702609,0.838218,0.65569,0.702609
92,0.8929,0.564617,0.75141,0.585615,0.564617
115,0.7568,0.525509,0.72492,0.589147,0.525509
138,0.4941,0.475858,0.689824,0.551018,0.475858
161,0.3439,0.52021,0.721256,0.566908,0.52021


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 696ms/step - loss: 4.5647 - root_mean_squared_error: 2.1365
Epoch 1: val_root_mean_squared_error improved from inf to 0.59741, saving model to cache/ensemble_camembert-base/models/mlp/cc948c3e4c896b432bd30fbc57bf2fa850b5cd00ae138e5a8d26f8f158344945_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.2514 - root_mean_squared_error: 1.4535 - val_loss: 0.3569 - val_root_mean_squared_error: 0.5974
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2970 - root_mean_squared_error: 0.5450
Epoch 2: val_root_mean_squared_error improved from 0.59741 to 0.50016, saving model to cache/ensemble_camembert-base/models/mlp/cc948c3e4c896b432bd30fbc57bf2fa850b5cd00ae138e5a8d26f8f158344945_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7327 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3992,4.89173,2.211726,1.964861,4.89173
46,2.4639,1.472597,1.213506,0.928296,1.472597
69,0.7178,1.021497,1.010692,0.774362,1.021498
92,0.799,0.799342,0.894059,0.684636,0.799342
115,0.5928,0.914603,0.956349,0.724541,0.914603
138,0.5388,1.186345,1.089195,0.821089,1.186345
161,0.339,0.945846,0.972546,0.804829,0.945846


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 8.1324 - root_mean_squared_error: 2.8517
Epoch 1: val_root_mean_squared_error improved from inf to 0.84385, saving model to cache/ensemble_camembert-base/models/mlp/a8eaebffc0733744c24dc44c1d8a88151103d440835cf5494aa5786b69ef3e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7815 - root_mean_squared_error: 1.5870 - val_loss: 0.7121 - val_root_mean_squared_error: 0.8439
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.9639 - root_mean_squared_error: 0.9818
Epoch 2: val_root_mean_squared_error improved from 0.84385 to 0.61198, saving model to cache/ensemble_camembert-base/models/mlp/a8eaebffc0733744c24dc44c1d8a88151103d440835cf5494aa5786b69ef3e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3892 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5867,4.489234,2.118781,1.847444,4.489234
46,2.7151,1.467115,1.211245,0.991197,1.467115
69,1.0285,1.079727,1.039099,0.873241,1.079727
92,0.7043,0.97349,0.986656,0.826095,0.97349
115,0.6406,0.759317,0.871388,0.760401,0.759317
138,0.4454,0.57823,0.760415,0.638537,0.57823
161,0.4,0.50853,0.713113,0.579174,0.50853


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 5.5047 - root_mean_squared_error: 2.3462
Epoch 1: val_root_mean_squared_error improved from inf to 0.66981, saving model to cache/ensemble_camembert-base/models/mlp/d03ef7687352d9539ad20a9489ef19acd31ec059a6862a9a512583b5ee3c9ffe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5980 - root_mean_squared_error: 1.5472 - val_loss: 0.4486 - val_root_mean_squared_error: 0.6698
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4983 - root_mean_squared_error: 0.7059
Epoch 2: val_root_mean_squared_error did not improve from 0.66981
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4299 - root_mean_squared_error: 0.6526 - val_loss: 0.5350 - val_root_mean_squared_error: 0.7314
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8286,3.354179,1.831442,1.524862,3.354179
46,2.7545,1.119336,1.057987,0.812691,1.119336
69,0.9497,1.26192,1.123352,0.941594,1.26192
92,0.8343,1.508731,1.228304,1.07334,1.50873
115,0.708,1.153331,1.073932,0.887958,1.153331
138,0.6755,0.885032,0.940761,0.761114,0.885032
161,0.5072,0.75443,0.868579,0.716767,0.75443


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 7.6353 - root_mean_squared_error: 2.7632
Epoch 1: val_root_mean_squared_error improved from inf to 0.70946, saving model to cache/ensemble_camembert-base/models/mlp/6ee7a61e0c51edc963b5c4a3b46e67d88dfabcb9981508526cab3b47b7999ff3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9574 - root_mean_squared_error: 1.6473 - val_loss: 0.5033 - val_root_mean_squared_error: 0.7095
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3820 - root_mean_squared_error: 0.6180
Epoch 2: val_root_mean_squared_error improved from 0.70946 to 0.65313, saving model to cache/ensemble_camembert-base/models/mlp/6ee7a61e0c51edc963b5c4a3b46e67d88dfabcb9981508526cab3b47b7999ff3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5592 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0834,3.948905,1.987185,1.778658,3.948905
46,2.6335,1.012214,1.006089,0.790319,1.012214
69,0.8062,0.801189,0.895091,0.726158,0.801189
92,0.6431,0.563269,0.750513,0.590014,0.56327
115,0.6869,0.632449,0.795267,0.630937,0.632449
138,0.4708,0.576555,0.759312,0.630433,0.576555
161,0.3256,0.684511,0.827352,0.66556,0.684511


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 6.9596 - root_mean_squared_error: 2.6381
Epoch 1: val_root_mean_squared_error improved from inf to 0.62522, saving model to cache/ensemble_camembert-base/models/mlp/75b1bdb998dffff9b90429284b355bbfb687cb2e4ecee2340e030322b57e925a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 2.3860 - root_mean_squared_error: 1.4761 - val_loss: 0.3909 - val_root_mean_squared_error: 0.6252
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2351 - root_mean_squared_error: 0.4849
Epoch 2: val_root_mean_squared_error improved from 0.62522 to 0.54040, saving model to cache/ensemble_camembert-base/models/mlp/75b1bdb998dffff9b90429284b355bbfb687cb2e4ecee2340e030322b57e925a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3628 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3985,4.038105,2.009503,1.796771,4.038104
46,2.6173,1.152473,1.073533,0.83506,1.152473
69,0.9766,0.872041,0.933831,0.813304,0.872041
92,0.7129,0.653084,0.808136,0.652852,0.653084
115,0.4886,0.623682,0.789735,0.618768,0.623682
138,0.3623,0.632831,0.795507,0.62633,0.632831
161,0.28,0.707511,0.841137,0.630332,0.707511


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 4.6558 - root_mean_squared_error: 2.1577
Epoch 1: val_root_mean_squared_error improved from inf to 0.67108, saving model to cache/ensemble_camembert-base/models/mlp/3fa44c0dd855d51bd227c7e0b227afbf92085597d6c3e2505bf6dc0731839066_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6825 - root_mean_squared_error: 1.5786 - val_loss: 0.4504 - val_root_mean_squared_error: 0.6711
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4030 - root_mean_squared_error: 0.6348
Epoch 2: val_root_mean_squared_error did not improve from 0.67108
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4119 - root_mean_squared_error: 0.6383 - val_loss: 0.5405 - val_root_mean_squared_error: 0.7352
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0028,4.18699,2.046214,1.855314,4.18699
46,2.4801,1.013901,1.006927,0.821067,1.013901
69,0.8393,0.604582,0.777549,0.634968,0.604582
92,0.7253,0.638633,0.799145,0.657514,0.638633
115,0.628,0.597883,0.773229,0.627928,0.597883
138,0.4676,0.683602,0.826802,0.622911,0.683602
161,0.3819,0.693563,0.832804,0.605677,0.693563


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 9.6324 - root_mean_squared_error: 3.1036
Epoch 1: val_root_mean_squared_error improved from inf to 0.89010, saving model to cache/ensemble_camembert-base/models/mlp/07c721c138ec732f0fcf9a79f008d8a78607776f01781a6c00fcdd8f35514301_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0962 - root_mean_squared_error: 1.6757 - val_loss: 0.7923 - val_root_mean_squared_error: 0.8901
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8511 - root_mean_squared_error: 0.9226
Epoch 2: val_root_mean_squared_error improved from 0.89010 to 0.54352, saving model to cache/ensemble_camembert-base/models/mlp/07c721c138ec732f0fcf9a79f008d8a78607776f01781a6c00fcdd8f35514301_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4641 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6922,3.243836,1.801065,1.581868,3.243836
46,2.1492,0.82306,0.907227,0.652824,0.82306
69,0.9629,0.758755,0.871065,0.724862,0.758755
92,0.5748,0.710175,0.842719,0.712439,0.710175
115,0.5488,0.644211,0.802627,0.643142,0.644211
138,0.4556,0.72142,0.849365,0.727222,0.72142
161,0.3497,0.697327,0.835061,0.717163,0.697327


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 5.3470 - root_mean_squared_error: 2.3124
Epoch 1: val_root_mean_squared_error improved from inf to 0.93238, saving model to cache/ensemble_camembert-base/models/mlp/fb49d75f95a3ac4425bb74a03058702b65ca3e49722e4cae29ce0591fc770417_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.0454 - root_mean_squared_error: 1.3817 - val_loss: 0.8693 - val_root_mean_squared_error: 0.9324
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5615 - root_mean_squared_error: 0.7493
Epoch 2: val_root_mean_squared_error improved from 0.93238 to 0.75686, saving model to cache/ensemble_camembert-base/models/mlp/fb49d75f95a3ac4425bb74a03058702b65ca3e49722e4cae29ce0591fc770417_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4120 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6712,2.982781,1.727073,1.572655,2.982781
46,2.5585,0.600513,0.774928,0.62537,0.600513
69,0.8582,0.51662,0.718763,0.627803,0.51662
92,0.7433,0.440107,0.663406,0.563601,0.440107
115,0.646,0.397477,0.630458,0.503228,0.397477
138,0.4664,0.342388,0.585139,0.465997,0.342388
161,0.2691,0.382796,0.618705,0.452862,0.382796


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 6.9240 - root_mean_squared_error: 2.6314
Epoch 1: val_root_mean_squared_error improved from inf to 0.55871, saving model to cache/ensemble_camembert-base/models/mlp/88b26dbf5ebe0e276ce713f83c865118d1b202ee60e8cbc364ee5a252d0e8135_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7752 - root_mean_squared_error: 1.5941 - val_loss: 0.3122 - val_root_mean_squared_error: 0.5587
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 0.5048 - root_mean_squared_error: 0.7105
Epoch 2: val_root_mean_squared_error improved from 0.55871 to 0.52682, saving model to cache/ensemble_camembert-base/models/mlp/88b26dbf5ebe0e276ce713f83c865118d1b202ee60e8cbc364ee5a252d0e8135_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4930 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.582,4.53548,2.129667,1.859487,4.535481
46,2.6489,1.396346,1.181671,0.90373,1.396346
69,0.8347,1.10353,1.05049,0.885001,1.10353
92,0.7835,0.91038,0.954138,0.751011,0.91038
115,0.5515,0.664792,0.815347,0.639242,0.664791
138,0.448,0.708885,0.841953,0.679476,0.708885
161,0.2707,0.639149,0.799468,0.634346,0.639149


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 10.2865 - root_mean_squared_error: 3.2073
Epoch 1: val_root_mean_squared_error improved from inf to 0.55508, saving model to cache/ensemble_camembert-base/models/mlp/4f63c5812d7c65a626aa73fcd436ef60b6d4715b95e45bf09bf0bcd9fdd30d12_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9095 - root_mean_squared_error: 1.6141 - val_loss: 0.3081 - val_root_mean_squared_error: 0.5551
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3141 - root_mean_squared_error: 0.5605
Epoch 2: val_root_mean_squared_error did not improve from 0.55508
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6159 - root_mean_squared_error: 0.7730 - val_loss: 0.4051 - val_root_mean_squared_error: 0.6364
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - los

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4446,4.587879,2.141934,1.928649,4.587879
46,2.2561,1.219338,1.104237,0.873138,1.219339
69,0.9067,0.845359,0.919434,0.735829,0.845359
92,0.6475,0.797117,0.892814,0.727137,0.797117
115,0.6713,0.769159,0.877017,0.743946,0.769159
138,0.4612,0.677728,0.823242,0.694339,0.677728
161,0.3285,0.840286,0.916671,0.772386,0.840286


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 684ms/step - loss: 8.3595 - root_mean_squared_error: 2.8913
Epoch 1: val_root_mean_squared_error improved from inf to 0.99482, saving model to cache/ensemble_camembert-base/models/mlp/8bac784b71a0567891320418c97082ddd75ab91a9461d3d3ba89935fb079b93b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2418 - root_mean_squared_error: 1.7235 - val_loss: 0.9897 - val_root_mean_squared_error: 0.9948
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.6855 - root_mean_squared_error: 0.8279
Epoch 2: val_root_mean_squared_error improved from 0.99482 to 0.77489, saving model to cache/ensemble_camembert-base/models/mlp/8bac784b71a0567891320418c97082ddd75ab91a9461d3d3ba89935fb079b93b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5219 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2936,3.354905,1.83164,1.636926,3.354905
46,2.6353,0.777359,0.88168,0.658779,0.777359
69,0.9363,0.753225,0.867885,0.740517,0.753225
92,0.7807,0.528201,0.726774,0.623188,0.528201
115,0.6105,0.446048,0.667868,0.540116,0.446048
138,0.3801,0.533956,0.730723,0.558333,0.533956
161,0.2539,0.410017,0.640326,0.551196,0.410017


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 7.2950 - root_mean_squared_error: 2.7009
Epoch 1: val_root_mean_squared_error improved from inf to 0.50114, saving model to cache/ensemble_camembert-base/models/mlp/5170136033579d50495d65ecbad40ce8b7f100136bc373ab352211343c2b1a12_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3970 - root_mean_squared_error: 1.4757 - val_loss: 0.2511 - val_root_mean_squared_error: 0.5011
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2940 - root_mean_squared_error: 0.5423
Epoch 2: val_root_mean_squared_error did not improve from 0.50114
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.2924 - root_mean_squared_error: 0.5365 - val_loss: 0.3255 - val_root_mean_squared_error: 0.5705
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.5466,5.098289,2.257939,1.984369,5.098289
46,2.599,1.765417,1.32869,0.995799,1.765417
69,0.982,1.158782,1.076467,0.863196,1.158782
92,0.752,1.15104,1.072865,0.81329,1.15104
115,0.787,0.843111,0.918211,0.69371,0.843111
138,0.6282,0.789463,0.888517,0.664448,0.789463
161,0.3465,0.898713,0.948005,0.712383,0.898713


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 7.7951 - root_mean_squared_error: 2.7920
Epoch 1: val_root_mean_squared_error improved from inf to 0.59300, saving model to cache/ensemble_camembert-base/models/mlp/84acc7d4a7a28c8894cc67d8b6b14b7230685d24924d875239b8a2aee6de0aae_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7309 - root_mean_squared_error: 1.5730 - val_loss: 0.3517 - val_root_mean_squared_error: 0.5930
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.3544 - root_mean_squared_error: 0.5953
Epoch 2: val_root_mean_squared_error did not improve from 0.59300
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5946 - root_mean_squared_error: 0.7668 - val_loss: 0.4831 - val_root_mean_squared_error: 0.6951
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9671,4.027421,2.006843,1.812114,4.02742
46,2.6516,0.996202,0.998099,0.811285,0.996202
69,0.7414,0.735891,0.857841,0.712013,0.735891
92,0.8821,0.803383,0.896316,0.694742,0.803383
115,0.7174,0.667155,0.816795,0.61661,0.667155
138,0.5581,0.665331,0.815678,0.591848,0.665331
161,0.3143,0.661992,0.813629,0.607058,0.661992


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 7.2605 - root_mean_squared_error: 2.6945
Epoch 1: val_root_mean_squared_error improved from inf to 0.66747, saving model to cache/ensemble_camembert-base/models/mlp/07cf89bb1a4d57367addb8b2148820a0161dd4ddc5f99dc33bc0fa8a0f616415_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7202 - root_mean_squared_error: 1.5729 - val_loss: 0.4455 - val_root_mean_squared_error: 0.6675
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3600 - root_mean_squared_error: 0.6000
Epoch 2: val_root_mean_squared_error improved from 0.66747 to 0.44563, saving model to cache/ensemble_camembert-base/models/mlp/07cf89bb1a4d57367addb8b2148820a0161dd4ddc5f99dc33bc0fa8a0f616415_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3915 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4838,4.077297,2.019232,1.844798,4.077297
46,2.3801,0.94322,0.971195,0.81713,0.94322
69,0.7725,0.654555,0.809046,0.707027,0.654555
92,0.6954,0.593529,0.770408,0.617979,0.593529
115,0.5763,0.502441,0.70883,0.564832,0.502441
138,0.3253,0.562086,0.749724,0.600533,0.562086
161,0.2546,0.494052,0.702889,0.560476,0.494052


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 700ms/step - loss: 5.5746 - root_mean_squared_error: 2.3611
Epoch 1: val_root_mean_squared_error improved from inf to 1.05616, saving model to cache/ensemble_camembert-base/models/mlp/83ec554b6a76100907f47604b907631691cd32ef6b91d85d530a57562a9b76b4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3436 - root_mean_squared_error: 1.4673 - val_loss: 1.1155 - val_root_mean_squared_error: 1.0562
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.9238 - root_mean_squared_error: 0.9611
Epoch 2: val_root_mean_squared_error improved from 1.05616 to 0.65094, saving model to cache/ensemble_camembert-base/models/mlp/83ec554b6a76100907f47604b907631691cd32ef6b91d85d530a57562a9b76b4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5955 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3668,4.827209,2.197091,1.980923,4.827208
46,2.0881,1.328152,1.152455,0.884674,1.328152
69,0.8583,0.989451,0.994712,0.755053,0.989451
92,0.7505,0.734737,0.857168,0.62438,0.734737
115,0.6658,0.56475,0.751499,0.538169,0.56475
138,0.561,0.496738,0.704796,0.518236,0.496738
161,0.5125,0.407012,0.637975,0.474883,0.407012


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.8194 - root_mean_squared_error: 2.6114
Epoch 1: val_root_mean_squared_error improved from inf to 0.51979, saving model to cache/ensemble_camembert-base/models/mlp/d5d3fc6bed7ec653c29f8e2742f4a8da6e184f6e0227a1c0333c7a7fdbdc1b6a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7631 - root_mean_squared_error: 1.5918 - val_loss: 0.2702 - val_root_mean_squared_error: 0.5198
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2651 - root_mean_squared_error: 0.5149
Epoch 2: val_root_mean_squared_error improved from 0.51979 to 0.44484, saving model to cache/ensemble_camembert-base/models/mlp/d5d3fc6bed7ec653c29f8e2742f4a8da6e184f6e0227a1c0333c7a7fdbdc1b6a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3590 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4646,3.206164,1.790577,1.624907,3.206165
46,2.9296,0.719913,0.848477,0.723147,0.719913
69,0.8765,0.558431,0.747283,0.616449,0.558431
92,0.7616,0.542369,0.736457,0.618975,0.542369
115,0.542,0.614776,0.784076,0.665358,0.614776
138,0.4869,0.774965,0.880321,0.733791,0.774965
161,0.3862,0.79714,0.892827,0.733455,0.79714


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 9.6597 - root_mean_squared_error: 3.1080
Epoch 1: val_root_mean_squared_error improved from inf to 0.67394, saving model to cache/ensemble_camembert-base/models/mlp/4dfb0c8b4dff33447592e9090269c6f47768fdef852b998f4bd2c0bf9774a47c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1238 - root_mean_squared_error: 1.6697 - val_loss: 0.4542 - val_root_mean_squared_error: 0.6739
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1375 - root_mean_squared_error: 0.3708
Epoch 2: val_root_mean_squared_error improved from 0.67394 to 0.63534, saving model to cache/ensemble_camembert-base/models/mlp/4dfb0c8b4dff33447592e9090269c6f47768fdef852b998f4bd2c0bf9774a47c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2592 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2182,3.99372,1.998429,1.817396,3.99372
46,2.6642,1.027079,1.013449,0.791605,1.027079
69,1.134,0.664988,0.815468,0.690133,0.664988
92,0.821,0.549592,0.741345,0.593358,0.549592
115,0.6025,0.636177,0.797607,0.630803,0.636177
138,0.4416,0.694251,0.833218,0.659323,0.694251
161,0.3132,0.707715,0.841258,0.645989,0.707715


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 5.3119 - root_mean_squared_error: 2.3048
Epoch 1: val_root_mean_squared_error improved from inf to 0.55913, saving model to cache/ensemble_camembert-base/models/mlp/90627c4cf927a73c683716766299bb210411545779725ff855a9709ed3e568aa_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.2331 - root_mean_squared_error: 1.4314 - val_loss: 0.3126 - val_root_mean_squared_error: 0.5591
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1790 - root_mean_squared_error: 0.4230
Epoch 2: val_root_mean_squared_error did not improve from 0.55913
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4673 - root_mean_squared_error: 0.6753 - val_loss: 0.6361 - val_root_mean_squared_error: 0.7976
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.127,4.56716,2.137091,1.932422,4.56716
46,2.4745,1.231055,1.109529,0.88391,1.231055
69,0.8029,0.805491,0.897491,0.760708,0.805491
92,0.8111,0.680263,0.82478,0.712518,0.680263
115,0.597,0.849077,0.921454,0.774745,0.849077
138,0.4336,0.739625,0.860014,0.696639,0.739625
161,0.3499,0.648483,0.805284,0.669134,0.648483


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 692ms/step - loss: 4.3646 - root_mean_squared_error: 2.0892
Epoch 1: val_root_mean_squared_error improved from inf to 0.98249, saving model to cache/ensemble_camembert-base/models/mlp/484585ffd706500a7f7ace1488b27c7e2241b40b06102f345843c09c6fad1282_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7113 - root_mean_squared_error: 1.5824 - val_loss: 0.9653 - val_root_mean_squared_error: 0.9825
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5328 - root_mean_squared_error: 0.7299
Epoch 2: val_root_mean_squared_error improved from 0.98249 to 0.66410, saving model to cache/ensemble_camembert-base/models/mlp/484585ffd706500a7f7ace1488b27c7e2241b40b06102f345843c09c6fad1282_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3186 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7667,4.073913,2.018394,1.795085,4.073913
46,2.6412,1.112528,1.054765,0.836562,1.112528
69,0.8489,0.817171,0.903975,0.780228,0.817171
92,0.5893,0.603411,0.776795,0.594274,0.603411
115,0.5233,0.494562,0.703251,0.57682,0.494562
138,0.4813,0.632995,0.79561,0.608099,0.632995
161,0.384,0.499229,0.706561,0.551857,0.499229


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 6.9957 - root_mean_squared_error: 2.6449
Epoch 1: val_root_mean_squared_error improved from inf to 0.75896, saving model to cache/ensemble_camembert-base/models/mlp/9c9b88e3d727ee7efcc7e46815c5a0506914622fdc2c6cace5dad8495e0d2070_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6530 - root_mean_squared_error: 1.5536 - val_loss: 0.5760 - val_root_mean_squared_error: 0.7590
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1903 - root_mean_squared_error: 0.4362
Epoch 2: val_root_mean_squared_error did not improve from 0.75896
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5013 - root_mean_squared_error: 0.6982 - val_loss: 0.6548 - val_root_mean_squared_error: 0.8092
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7477,4.217171,2.053575,1.866424,4.217171
46,2.555,1.019078,1.009494,0.770093,1.019078
69,0.8194,0.720854,0.849031,0.655202,0.720854
92,0.8461,0.648247,0.805138,0.62663,0.648247
115,0.6302,0.556613,0.746065,0.568154,0.556613
138,0.4941,0.496272,0.704465,0.523722,0.496272
161,0.241,0.610008,0.78103,0.567309,0.610008


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 698ms/step - loss: 5.9245 - root_mean_squared_error: 2.4340
Epoch 1: val_root_mean_squared_error improved from inf to 0.53275, saving model to cache/ensemble_camembert-base/models/mlp/c92f44848a2384af63d3cfbddba60d63334221abe02da31e9e1c586b1e2533ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5849 - root_mean_squared_error: 1.5502 - val_loss: 0.2838 - val_root_mean_squared_error: 0.5327
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.0881 - root_mean_squared_error: 0.2969
Epoch 2: val_root_mean_squared_error improved from 0.53275 to 0.49647, saving model to cache/ensemble_camembert-base/models/mlp/c92f44848a2384af63d3cfbddba60d63334221abe02da31e9e1c586b1e2533ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3440 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.232,3.718058,1.928227,1.738642,3.718058
46,2.84,0.90041,0.948899,0.761023,0.90041
69,0.8315,0.665721,0.815917,0.644543,0.665721
92,0.6945,0.601121,0.77532,0.636387,0.601121
115,0.5936,0.512778,0.716085,0.566823,0.512778
138,0.3895,0.437278,0.66127,0.48817,0.437278
161,0.263,0.390105,0.624584,0.484818,0.390105


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 5.9253 - root_mean_squared_error: 2.4342
Epoch 1: val_root_mean_squared_error improved from inf to 0.68916, saving model to cache/ensemble_camembert-base/models/mlp/40166beeeec832cc93000864fa474708241e832c774bfc63085e6a64b143b22d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7273 - root_mean_squared_error: 1.5901 - val_loss: 0.4749 - val_root_mean_squared_error: 0.6892
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4900 - root_mean_squared_error: 0.7000
Epoch 2: val_root_mean_squared_error improved from 0.68916 to 0.57876, saving model to cache/ensemble_camembert-base/models/mlp/40166beeeec832cc93000864fa474708241e832c774bfc63085e6a64b143b22d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4016 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9437,4.812007,2.193629,1.946517,4.812007
46,2.4694,1.429075,1.195439,0.933555,1.429075
69,0.8515,1.01262,1.00629,0.84746,1.01262
92,0.8241,0.9635,0.981581,0.822211,0.9635
115,0.7605,0.839751,0.916379,0.732862,0.839751
138,0.6124,0.527603,0.726363,0.574381,0.527603
161,0.5848,0.526179,0.725382,0.578752,0.526179


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 5.9808 - root_mean_squared_error: 2.4456
Epoch 1: val_root_mean_squared_error improved from inf to 0.53349, saving model to cache/ensemble_camembert-base/models/mlp/98bfc68988dcd09c544d6055459ffd19f971da505be62b5b0071443f1b3ff25e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0257 - root_mean_squared_error: 1.6821 - val_loss: 0.2846 - val_root_mean_squared_error: 0.5335
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3127 - root_mean_squared_error: 0.5592
Epoch 2: val_root_mean_squared_error improved from 0.53349 to 0.39183, saving model to cache/ensemble_camembert-base/models/mlp/98bfc68988dcd09c544d6055459ffd19f971da505be62b5b0071443f1b3ff25e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4569 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0362,5.12645,2.264166,2.024307,5.12645
46,2.6423,1.693647,1.301402,1.017774,1.693647
69,1.1224,0.797726,0.893155,0.730001,0.797726
92,0.7087,0.815736,0.903181,0.774068,0.815736
115,0.5413,0.788336,0.887883,0.70156,0.788336
138,0.4622,0.707416,0.84108,0.656883,0.707416
161,0.3448,0.756662,0.869863,0.660233,0.756662


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 674ms/step - loss: 6.1741 - root_mean_squared_error: 2.4848
Epoch 1: val_root_mean_squared_error improved from inf to 1.21636, saving model to cache/ensemble_camembert-base/models/mlp/c01eb476db289b13611dfb0f2d9326966a6579d18e466723c8a66d4b94c9cd51_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8292 - root_mean_squared_error: 1.6178 - val_loss: 1.4795 - val_root_mean_squared_error: 1.2164
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8878 - root_mean_squared_error: 0.9422
Epoch 2: val_root_mean_squared_error improved from 1.21636 to 0.52434, saving model to cache/ensemble_camembert-base/models/mlp/c01eb476db289b13611dfb0f2d9326966a6579d18e466723c8a66d4b94c9cd51_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4365 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9561,4.396349,2.096747,1.921685,4.396349
46,2.6298,1.043182,1.021363,0.805356,1.043183
69,1.0037,0.649793,0.806097,0.643316,0.649793
92,0.604,0.565851,0.752231,0.622638,0.565851
115,0.3925,0.82292,0.90715,0.715518,0.82292
138,0.2801,0.884241,0.940341,0.718265,0.884241
161,0.1878,0.741441,0.86107,0.667255,0.741441


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 8.5298 - root_mean_squared_error: 2.9206
Epoch 1: val_root_mean_squared_error improved from inf to 0.81170, saving model to cache/ensemble_camembert-base/models/mlp/6bd5c4937040097e4da331b1bc970d6c456731b6eea09fbc46c64193b4cddbc1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8794 - root_mean_squared_error: 1.6079 - val_loss: 0.6589 - val_root_mean_squared_error: 0.8117
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4971 - root_mean_squared_error: 0.7050
Epoch 2: val_root_mean_squared_error improved from 0.81170 to 0.45203, saving model to cache/ensemble_camembert-base/models/mlp/6bd5c4937040097e4da331b1bc970d6c456731b6eea09fbc46c64193b4cddbc1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3859 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8727,3.358602,1.832649,1.615112,3.358602
46,2.4929,0.87645,0.936189,0.738821,0.87645
69,1.0521,0.822482,0.906908,0.781938,0.822482
92,0.8938,0.724444,0.851143,0.717574,0.724444
115,0.762,0.694614,0.833435,0.681982,0.694614
138,0.5379,0.788688,0.888081,0.712787,0.788688
161,0.4741,0.764583,0.874405,0.687723,0.764583


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 9.3638 - root_mean_squared_error: 3.0600
Epoch 1: val_root_mean_squared_error improved from inf to 0.80521, saving model to cache/ensemble_camembert-base/models/mlp/b0610079c7dba3b8ee1400012003fc1bfb340015fdc7a5731526be9ee8f80bb7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.5816 - root_mean_squared_error: 1.8017 - val_loss: 0.6484 - val_root_mean_squared_error: 0.8052
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5286 - root_mean_squared_error: 0.7271
Epoch 2: val_root_mean_squared_error improved from 0.80521 to 0.50105, saving model to cache/ensemble_camembert-base/models/mlp/b0610079c7dba3b8ee1400012003fc1bfb340015fdc7a5731526be9ee8f80bb7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4313 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2783,3.067946,1.751555,1.577915,3.067947
46,2.734,0.675037,0.821607,0.645546,0.675038
69,0.7983,0.520999,0.721803,0.571732,0.520999
92,0.6096,0.411237,0.641278,0.506683,0.411237
115,0.3808,0.437089,0.661127,0.512507,0.437089
138,0.3157,0.585195,0.764981,0.571454,0.585195
161,0.2304,0.610695,0.78147,0.581401,0.610695


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 6.8571 - root_mean_squared_error: 2.6186
Epoch 1: val_root_mean_squared_error improved from inf to 0.66916, saving model to cache/ensemble_camembert-base/models/mlp/1f19d39383ed69b8645ac611b534cdb8caa64f00d334c182edaa4880027af372_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6205 - root_mean_squared_error: 1.5388 - val_loss: 0.4478 - val_root_mean_squared_error: 0.6692
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 0.2235 - root_mean_squared_error: 0.4728
Epoch 2: val_root_mean_squared_error did not improve from 0.66916
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3624 - root_mean_squared_error: 0.5999 - val_loss: 0.5297 - val_root_mean_squared_error: 0.7278
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8493,5.047226,2.246603,2.017101,5.047226
46,3.0428,1.655989,1.286852,0.904754,1.655989
69,0.9694,0.976079,0.987967,0.811947,0.976079
92,0.6473,0.935107,0.967009,0.778873,0.935107
115,0.5596,0.766104,0.875274,0.694473,0.766104
138,0.4362,0.785026,0.886017,0.716319,0.785026
161,0.2742,0.784899,0.885946,0.734833,0.784899


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 8.6699 - root_mean_squared_error: 2.9445
Epoch 1: val_root_mean_squared_error improved from inf to 0.68246, saving model to cache/ensemble_camembert-base/models/mlp/046669bded4eb526f94b634a704da991e82ceafe1170f0bbd45079b8b8116890_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7434 - root_mean_squared_error: 1.5674 - val_loss: 0.4658 - val_root_mean_squared_error: 0.6825
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5621 - root_mean_squared_error: 0.7497
Epoch 2: val_root_mean_squared_error did not improve from 0.68246
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7317 - root_mean_squared_error: 0.8436 - val_loss: 1.2487 - val_root_mean_squared_error: 1.1175
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.995,4.113362,2.028142,1.83829,4.113362
46,2.8409,1.024264,1.012059,0.76366,1.024264
69,0.9392,0.727436,0.852899,0.657871,0.727436
92,0.7506,0.634468,0.796535,0.59474,0.634468
115,0.5614,0.81811,0.904494,0.653334,0.81811
138,0.3439,0.815371,0.902979,0.688328,0.815371
161,0.2508,0.793602,0.890843,0.671408,0.793602


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 677ms/step - loss: 5.5831 - root_mean_squared_error: 2.3629
Epoch 1: val_root_mean_squared_error improved from inf to 0.70456, saving model to cache/ensemble_camembert-base/models/mlp/b7b0b74bd389cd4600fb1c924840f539de51173128915e150c52fc7a7d53c2fd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.2406 - root_mean_squared_error: 1.4314 - val_loss: 0.4964 - val_root_mean_squared_error: 0.7046
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2414 - root_mean_squared_error: 0.4913
Epoch 2: val_root_mean_squared_error did not improve from 0.70456
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3182 - root_mean_squared_error: 0.5599 - val_loss: 0.6012 - val_root_mean_squared_error: 0.7754
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2394,4.377316,2.092204,1.897845,4.377316
46,3.2351,1.185395,1.088758,0.859992,1.185395
69,0.9836,0.657511,0.81087,0.647708,0.657511
92,0.7896,0.77502,0.880352,0.745645,0.77502
115,0.5681,0.593524,0.770405,0.60905,0.593524
138,0.3904,0.591206,0.768899,0.594967,0.591206
161,0.371,0.5332,0.730206,0.612149,0.5332


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 7.3520 - root_mean_squared_error: 2.7115
Epoch 1: val_root_mean_squared_error improved from inf to 0.71158, saving model to cache/ensemble_camembert-base/models/mlp/095265363eb408ee772168200c9599c4ca2b8b3c5ca57df55a4c507092b76fca_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6340 - root_mean_squared_error: 1.5451 - val_loss: 0.5063 - val_root_mean_squared_error: 0.7116
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2014 - root_mean_squared_error: 0.4488
Epoch 2: val_root_mean_squared_error did not improve from 0.71158
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3349 - root_mean_squared_error: 0.5761 - val_loss: 1.0841 - val_root_mean_squared_error: 1.0412
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9426,3.184093,1.784403,1.602725,3.184093
46,3.4434,0.726302,0.852234,0.635716,0.726302
69,0.8566,0.42381,0.651007,0.552602,0.42381
92,0.7603,0.537498,0.733142,0.63759,0.537498
115,0.5738,0.328968,0.573557,0.46542,0.328968
138,0.4757,0.551259,0.742468,0.607794,0.551259
161,0.308,0.35184,0.593161,0.471875,0.35184


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 8.1186 - root_mean_squared_error: 2.8493
Epoch 1: val_root_mean_squared_error improved from inf to 0.99453, saving model to cache/ensemble_camembert-base/models/mlp/27fa6663f4946e24e468ade7e23ac63fc914c31bdc4cb045372b9aa696326b46_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 3.0073 - root_mean_squared_error: 1.6480 - val_loss: 0.9891 - val_root_mean_squared_error: 0.9945
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4764 - root_mean_squared_error: 0.6902
Epoch 2: val_root_mean_squared_error did not improve from 0.99453
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4778 - root_mean_squared_error: 0.6891 - val_loss: 1.1381 - val_root_mean_squared_error: 1.0668
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3127,4.257861,2.063459,1.842844,4.257861
46,2.5898,1.140511,1.067947,0.864755,1.140511
69,0.9947,0.749268,0.865603,0.701315,0.749268
92,0.6516,0.975468,0.987658,0.784167,0.975468
115,0.5762,0.768785,0.876804,0.725712,0.768785
138,0.4761,0.710482,0.842901,0.695126,0.710482
161,0.3888,0.653743,0.808544,0.689847,0.653743


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 4.6355 - root_mean_squared_error: 2.1530
Epoch 1: val_root_mean_squared_error improved from inf to 0.60117, saving model to cache/ensemble_camembert-base/models/mlp/fc9839c805b360b0fa9ae962b9afac00a945b4cf8d9a2d068df9712894d0fba4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6813 - root_mean_squared_error: 1.5881 - val_loss: 0.3614 - val_root_mean_squared_error: 0.6012
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4416 - root_mean_squared_error: 0.6645
Epoch 2: val_root_mean_squared_error did not improve from 0.60117
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7316 - root_mean_squared_error: 0.8470 - val_loss: 0.6623 - val_root_mean_squared_error: 0.8138
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9594,4.516243,2.125145,1.87919,4.516243
46,2.6233,1.216363,1.102888,0.858163,1.216363
69,0.8412,0.846193,0.919888,0.763214,0.846193
92,0.6511,0.790613,0.889164,0.679726,0.790613
115,0.5108,0.646582,0.804103,0.627443,0.646582
138,0.4724,0.546618,0.739336,0.575566,0.546618
161,0.363,0.543179,0.737007,0.558434,0.543179


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 7.7495 - root_mean_squared_error: 2.7838
Epoch 1: val_root_mean_squared_error improved from inf to 0.84761, saving model to cache/ensemble_camembert-base/models/mlp/006e6b92f1e08681bedd99c1c71c4c656b51bc9e5cb7ca05f3cc9f6f3851c595_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2284 - root_mean_squared_error: 1.7176 - val_loss: 0.7184 - val_root_mean_squared_error: 0.8476
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.4610 - root_mean_squared_error: 0.6790
Epoch 2: val_root_mean_squared_error improved from 0.84761 to 0.66898, saving model to cache/ensemble_camembert-base/models/mlp/006e6b92f1e08681bedd99c1c71c4c656b51bc9e5cb7ca05f3cc9f6f3851c595_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3686 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2769,5.103775,2.259154,2.042164,5.103775
46,2.9882,1.52614,1.235371,0.939847,1.526141
69,0.8734,0.895329,0.946218,0.786349,0.895329
92,0.7153,0.741478,0.861091,0.719281,0.741478
115,0.5371,0.78714,0.887209,0.719101,0.78714
138,0.3813,0.691694,0.831682,0.690603,0.691694
161,0.2884,0.798463,0.893568,0.759024,0.798463


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 6.3762 - root_mean_squared_error: 2.5251
Epoch 1: val_root_mean_squared_error improved from inf to 0.54935, saving model to cache/ensemble_camembert-base/models/mlp/49bec529ad8d68925f64afc706996070e79d54e98bd3ecd07704d2fc81a3d290_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1411 - root_mean_squared_error: 1.6970 - val_loss: 0.3018 - val_root_mean_squared_error: 0.5494
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2303 - root_mean_squared_error: 0.4799
Epoch 2: val_root_mean_squared_error did not improve from 0.54935
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.2579 - root_mean_squared_error: 0.5040 - val_loss: 1.2160 - val_root_mean_squared_error: 1.1027
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3923,5.092302,2.256613,2.025584,5.092302
46,2.7794,1.515463,1.231041,1.009902,1.515463
69,0.7214,0.832514,0.912422,0.734605,0.832514
92,0.6003,0.807017,0.898341,0.743041,0.807017
115,0.5683,0.634283,0.796419,0.628768,0.634283
138,0.5174,0.551948,0.742932,0.576387,0.551948
161,0.3592,0.631858,0.794895,0.610785,0.631858


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 8.1626 - root_mean_squared_error: 2.8570
Epoch 1: val_root_mean_squared_error improved from inf to 0.78951, saving model to cache/ensemble_camembert-base/models/mlp/b2a9ce0164f7f56ace57688da928ad7adffde891626f06b3921759250a5b02ce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8496 - root_mean_squared_error: 1.6104 - val_loss: 0.6233 - val_root_mean_squared_error: 0.7895
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1713 - root_mean_squared_error: 0.4139
Epoch 2: val_root_mean_squared_error improved from 0.78951 to 0.59591, saving model to cache/ensemble_camembert-base/models/mlp/b2a9ce0164f7f56ace57688da928ad7adffde891626f06b3921759250a5b02ce_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.2782 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2477,4.43681,2.106373,1.791929,4.436809
46,2.7376,1.525518,1.235119,0.912946,1.525518
69,0.7969,1.199703,1.09531,0.922928,1.199703
92,0.6498,0.947515,0.973404,0.738534,0.947515
115,0.6061,0.79827,0.89346,0.687004,0.79827
138,0.3793,0.761994,0.872922,0.690553,0.761994
161,0.2974,0.856237,0.925331,0.701117,0.856237


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 673ms/step - loss: 9.1559 - root_mean_squared_error: 3.0259
Epoch 1: val_root_mean_squared_error improved from inf to 0.53911, saving model to cache/ensemble_camembert-base/models/mlp/cf185f594b57fc7b20804e86ce3bd67c8549dbef160e93b6a6bb2a81540246f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6954 - root_mean_squared_error: 1.5570 - val_loss: 0.2906 - val_root_mean_squared_error: 0.5391
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5205 - root_mean_squared_error: 0.7215
Epoch 2: val_root_mean_squared_error improved from 0.53911 to 0.41313, saving model to cache/ensemble_camembert-base/models/mlp/cf185f594b57fc7b20804e86ce3bd67c8549dbef160e93b6a6bb2a81540246f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4736 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3916,3.883674,1.970704,1.723551,3.883674
46,2.9169,1.100016,1.048816,0.792312,1.100016
69,0.8032,0.932344,0.96558,0.75271,0.932345
92,0.8408,0.879638,0.93789,0.729969,0.879638
115,0.6774,0.868844,0.932118,0.68754,0.868844
138,0.5627,0.831645,0.911946,0.667854,0.831645
161,0.3584,0.865832,0.930501,0.677807,0.865832


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 5.9826 - root_mean_squared_error: 2.4459
Epoch 1: val_root_mean_squared_error improved from inf to 1.59700, saving model to cache/ensemble_camembert-base/models/mlp/c24bae4a232597417afa0883aaca76b9267952ce314546f0e463c4442e56b3f8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5257 - root_mean_squared_error: 1.5221 - val_loss: 2.5504 - val_root_mean_squared_error: 1.5970
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 1.9174 - root_mean_squared_error: 1.3847
Epoch 2: val_root_mean_squared_error did not improve from 1.59700
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7571 - root_mean_squared_error: 0.8366 - val_loss: 2.5712 - val_root_mean_squared_error: 1.6035
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.672,3.506481,1.87256,1.690205,3.506481
46,2.3482,0.795925,0.892146,0.704073,0.795925
69,0.8465,0.646892,0.804296,0.654341,0.646892
92,0.6968,0.647601,0.804737,0.705448,0.647601
115,0.5949,0.832827,0.912593,0.773614,0.832827
138,0.478,0.586685,0.765954,0.64341,0.586685
161,0.3003,0.659171,0.811893,0.661821,0.659171


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 687ms/step - loss: 5.8626 - root_mean_squared_error: 2.4213
Epoch 1: val_root_mean_squared_error improved from inf to 0.69648, saving model to cache/ensemble_camembert-base/models/mlp/9dd0bcbfe8a9d1b0e3ced789f2aed0abb5ac05077df263d2b45578c2971a8b64_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3709 - root_mean_squared_error: 1.4762 - val_loss: 0.4851 - val_root_mean_squared_error: 0.6965
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss: 0.6426 - root_mean_squared_error: 0.8016
Epoch 2: val_root_mean_squared_error improved from 0.69648 to 0.45166, saving model to cache/ensemble_camembert-base/models/mlp/9dd0bcbfe8a9d1b0e3ced789f2aed0abb5ac05077df263d2b45578c2971a8b64_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5825 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4027,3.553126,1.884974,1.692082,3.553126
46,2.5943,0.873486,0.934605,0.730052,0.873486
69,0.9544,0.665794,0.815962,0.675332,0.665794
92,0.7004,0.65516,0.809419,0.679559,0.65516
115,0.5549,0.430276,0.655954,0.532989,0.430276
138,0.4865,0.436179,0.660438,0.549294,0.436179
161,0.2482,0.401435,0.633589,0.527303,0.401435


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 4.7852 - root_mean_squared_error: 2.1875
Epoch 1: val_root_mean_squared_error improved from inf to 0.85401, saving model to cache/ensemble_camembert-base/models/mlp/f4de0cb00be1f6c7f3b79a2cefa1cc53db11c0fbda5c94979f4191552b2b9143_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3883 - root_mean_squared_error: 1.4885 - val_loss: 0.7293 - val_root_mean_squared_error: 0.8540
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.4729 - root_mean_squared_error: 0.6877
Epoch 2: val_root_mean_squared_error improved from 0.85401 to 0.56328, saving model to cache/ensemble_camembert-base/models/mlp/f4de0cb00be1f6c7f3b79a2cefa1cc53db11c0fbda5c94979f4191552b2b9143_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3215 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9218,2.957715,1.719801,1.569472,2.957715
46,2.6984,0.542144,0.736304,0.583167,0.542144
69,0.9976,0.497759,0.70552,0.562161,0.497759
92,0.7323,0.478198,0.691518,0.511525,0.478198
115,0.6453,0.477755,0.691198,0.506084,0.477755
138,0.4643,0.476122,0.690016,0.548576,0.476122
161,0.2663,0.451807,0.672166,0.522315,0.451807


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 656ms/step - loss: 8.7030 - root_mean_squared_error: 2.9501
Epoch 1: val_root_mean_squared_error improved from inf to 0.86765, saving model to cache/ensemble_camembert-base/models/mlp/02d77c0935b1550a904e5e6fdf1cf36954ec4352cfb635254654ed04f183fe62_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8671 - root_mean_squared_error: 1.6173 - val_loss: 0.7528 - val_root_mean_squared_error: 0.8677
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5485 - root_mean_squared_error: 0.7406
Epoch 2: val_root_mean_squared_error improved from 0.86765 to 0.66666, saving model to cache/ensemble_camembert-base/models/mlp/02d77c0935b1550a904e5e6fdf1cf36954ec4352cfb635254654ed04f183fe62_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5658 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5031,4.208175,2.051384,1.877526,4.208175
46,2.3157,1.10128,1.049419,0.77983,1.10128
69,0.9577,0.708078,0.841474,0.684747,0.708078
92,0.8263,0.641637,0.801022,0.665337,0.641637
115,0.7147,0.657454,0.810835,0.684669,0.657454
138,0.5486,0.648071,0.805029,0.630706,0.648071
161,0.3242,0.592177,0.76953,0.60034,0.592177


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 5.1816 - root_mean_squared_error: 2.2763
Epoch 1: val_root_mean_squared_error improved from inf to 0.63787, saving model to cache/ensemble_camembert-base/models/mlp/997254962ddefe227569a42e24e51898dc13590b4207bff0d24ccc1b3571687d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.1420 - root_mean_squared_error: 1.4090 - val_loss: 0.4069 - val_root_mean_squared_error: 0.6379
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2592 - root_mean_squared_error: 0.5091
Epoch 2: val_root_mean_squared_error did not improve from 0.63787
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5064 - root_mean_squared_error: 0.7040 - val_loss: 1.4311 - val_root_mean_squared_error: 1.1963
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3691,5.082301,2.254396,1.984273,5.082301
46,2.5769,1.612398,1.269802,0.9979,1.612398
69,0.6934,1.149811,1.072292,0.871866,1.149811
92,0.7709,0.996673,0.998335,0.779868,0.996673
115,0.5879,1.052838,1.026079,0.822444,1.052838
138,0.5084,1.172836,1.082976,0.860603,1.172836
161,0.3713,1.033963,1.01684,0.817926,1.033963


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 8.4289 - root_mean_squared_error: 2.9033
Epoch 1: val_root_mean_squared_error improved from inf to 0.71257, saving model to cache/ensemble_camembert-base/models/mlp/8d41d49f14bc51adc877083aa3799bce461f88672a3661543353ece740d8f903_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6954 - root_mean_squared_error: 1.5576 - val_loss: 0.5077 - val_root_mean_squared_error: 0.7126
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2474 - root_mean_squared_error: 0.4974
Epoch 2: val_root_mean_squared_error improved from 0.71257 to 0.60867, saving model to cache/ensemble_camembert-base/models/mlp/8d41d49f14bc51adc877083aa3799bce461f88672a3661543353ece740d8f903_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4742 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.867,4.233812,2.057623,1.796527,4.233813
46,2.7756,1.358788,1.16567,0.928347,1.358788
69,0.9919,1.079182,1.038837,0.8529,1.079182
92,0.7463,0.811036,0.900575,0.760373,0.811036
115,0.499,0.685703,0.828072,0.681075,0.685703
138,0.3691,0.70859,0.841778,0.709031,0.70859
161,0.2462,0.670734,0.818983,0.704293,0.670734


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 5.3946 - root_mean_squared_error: 2.3226
Epoch 1: val_root_mean_squared_error improved from inf to 0.68123, saving model to cache/ensemble_camembert-base/models/mlp/f0814ebe8f8e9e305b9f5c6b641236468675b4eb3d8137449aa7dbb284e02454_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3682 - root_mean_squared_error: 1.4766 - val_loss: 0.4641 - val_root_mean_squared_error: 0.6812
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4876 - root_mean_squared_error: 0.6983
Epoch 2: val_root_mean_squared_error did not improve from 0.68123
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3487 - root_mean_squared_error: 0.5884 - val_loss: 0.8351 - val_root_mean_squared_error: 0.9138
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9178,3.716441,1.927807,1.671928,3.71644
46,2.7368,1.112187,1.054603,0.808001,1.112187
69,1.0362,1.091125,1.044569,0.851379,1.091125
92,0.7253,0.874451,0.935121,0.737327,0.874451
115,0.6692,0.986606,0.99328,0.814949,0.986606
138,0.5701,1.051186,1.025274,0.824672,1.051186
161,0.4196,0.99563,0.997813,0.812829,0.99563


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 685ms/step - loss: 6.5199 - root_mean_squared_error: 2.5534
Epoch 1: val_root_mean_squared_error improved from inf to 0.67722, saving model to cache/ensemble_camembert-base/models/mlp/53e3cfbc2042fca9cdd32768a3cbd77c2f5a9a784a0b77a99cf4a19f30c5c78c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6777 - root_mean_squared_error: 1.5684 - val_loss: 0.4586 - val_root_mean_squared_error: 0.6772
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 0.4924 - root_mean_squared_error: 0.7017
Epoch 2: val_root_mean_squared_error improved from 0.67722 to 0.35185, saving model to cache/ensemble_camembert-base/models/mlp/53e3cfbc2042fca9cdd32768a3cbd77c2f5a9a784a0b77a99cf4a19f30c5c78c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4228 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4956,3.441494,1.855126,1.695184,3.441494
46,2.6667,0.706043,0.840263,0.662403,0.706043
69,0.8917,0.585555,0.765216,0.619307,0.585555
92,0.6538,0.44366,0.666078,0.537787,0.44366
115,0.6308,0.44996,0.67079,0.576502,0.44996
138,0.3603,0.555557,0.745357,0.575163,0.555557
161,0.2892,0.561557,0.749371,0.598453,0.561557


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 7.2046 - root_mean_squared_error: 2.6841
Epoch 1: val_root_mean_squared_error improved from inf to 0.73435, saving model to cache/ensemble_camembert-base/models/mlp/83b1a39db8964db9bd4d59b269f94b00f1017352da17ce37a917e023cae3122a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5394 - root_mean_squared_error: 1.5162 - val_loss: 0.5393 - val_root_mean_squared_error: 0.7343
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5152 - root_mean_squared_error: 0.7178
Epoch 2: val_root_mean_squared_error improved from 0.73435 to 0.57568, saving model to cache/ensemble_camembert-base/models/mlp/83b1a39db8964db9bd4d59b269f94b00f1017352da17ce37a917e023cae3122a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4134 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2958,4.7861,2.187716,1.983866,4.7861
46,2.4266,1.459515,1.208104,0.956936,1.459514
69,0.9824,0.851178,0.922593,0.769821,0.851178
92,0.7774,0.759589,0.871544,0.699106,0.759589
115,0.6165,0.692439,0.832129,0.654944,0.692439
138,0.6362,0.696152,0.834357,0.688279,0.696152
161,0.3364,0.62249,0.788981,0.645247,0.62249


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 3.5860 - root_mean_squared_error: 1.8937
Epoch 1: val_root_mean_squared_error improved from inf to 0.77647, saving model to cache/ensemble_camembert-base/models/mlp/8dbb243a50642c7b195106006eea75546940ee11e734d41779ff9d55f561a856_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.2943 - root_mean_squared_error: 1.4675 - val_loss: 0.6029 - val_root_mean_squared_error: 0.7765
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5343 - root_mean_squared_error: 0.7309
Epoch 2: val_root_mean_squared_error did not improve from 0.77647
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3943 - root_mean_squared_error: 0.6254 - val_loss: 0.6433 - val_root_mean_squared_error: 0.8021
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1232,3.640253,1.907945,1.707834,3.640253
46,2.5976,0.85597,0.925187,0.725429,0.85597
69,0.7837,0.649091,0.805662,0.665866,0.649091
92,0.7054,0.728914,0.853765,0.677786,0.728914
115,0.5962,0.68339,0.826674,0.664359,0.68339
138,0.3649,0.690017,0.830673,0.628773,0.690018
161,0.2888,0.677365,0.823022,0.609437,0.677365


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 10.7814 - root_mean_squared_error: 3.2835
Epoch 1: val_root_mean_squared_error improved from inf to 0.94674, saving model to cache/ensemble_camembert-base/models/mlp/70ffdbf879bf2e134f7aeda6ea22a939e9b0f66cf7ef86e2776331d25709bf1a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.3616 - root_mean_squared_error: 1.7402 - val_loss: 0.8963 - val_root_mean_squared_error: 0.9467
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6881 - root_mean_squared_error: 0.8295
Epoch 2: val_root_mean_squared_error improved from 0.94674 to 0.83809, saving model to cache/ensemble_camembert-base/models/mlp/70ffdbf879bf2e134f7aeda6ea22a939e9b0f66cf7ef86e2776331d25709bf1a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5185 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4857,4.044879,2.011188,1.774905,4.044879
46,2.2654,1.112835,1.05491,0.818412,1.112836
69,0.919,0.710182,0.842723,0.724185,0.710182
92,0.6589,0.682139,0.825917,0.705398,0.682139
115,0.59,0.657745,0.811015,0.644552,0.657745
138,0.4433,0.59812,0.773382,0.610176,0.59812
161,0.3198,0.602944,0.776495,0.601612,0.602944


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 5.7142 - root_mean_squared_error: 2.3904
Epoch 1: val_root_mean_squared_error improved from inf to 0.68727, saving model to cache/ensemble_camembert-base/models/mlp/483c52c1f630b35f111bca22453a912d4bab4f0c1260a4cae2d5cc66abcec532_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3347 - root_mean_squared_error: 1.4676 - val_loss: 0.4723 - val_root_mean_squared_error: 0.6873
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3316 - root_mean_squared_error: 0.5758
Epoch 2: val_root_mean_squared_error improved from 0.68727 to 0.60020, saving model to cache/ensemble_camembert-base/models/mlp/483c52c1f630b35f111bca22453a912d4bab4f0c1260a4cae2d5cc66abcec532_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4366 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8576,3.335577,1.826356,1.637146,3.335577
46,2.5983,0.789284,0.888416,0.706688,0.789284
69,0.9216,0.545144,0.738338,0.604743,0.545144
92,0.7724,0.412812,0.642504,0.520934,0.412812
115,0.5373,0.522055,0.722533,0.583593,0.522055
138,0.3752,0.555725,0.74547,0.606205,0.555725
161,0.2343,0.573501,0.757298,0.616103,0.573501


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 7.4126 - root_mean_squared_error: 2.7226
Epoch 1: val_root_mean_squared_error improved from inf to 0.69186, saving model to cache/ensemble_camembert-base/models/mlp/c93d1d8b63c88973e0bf2afa410b1f498d509b4f864c5c87c377408a1d17c084_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4749 - root_mean_squared_error: 1.4927 - val_loss: 0.4787 - val_root_mean_squared_error: 0.6919
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.8779 - root_mean_squared_error: 0.9369
Epoch 2: val_root_mean_squared_error improved from 0.69186 to 0.51025, saving model to cache/ensemble_camembert-base/models/mlp/c93d1d8b63c88973e0bf2afa410b1f498d509b4f864c5c87c377408a1d17c084_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4268 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.977,4.295402,2.072535,1.779523,4.295402
46,3.0129,1.364049,1.167925,0.913037,1.364049
69,0.744,1.168815,1.081117,0.891297,1.168815
92,0.7331,0.86211,0.928499,0.733608,0.86211
115,0.6354,0.872388,0.934017,0.70251,0.872388
138,0.4821,0.841088,0.917108,0.684397,0.841088
161,0.4385,0.730865,0.854906,0.669087,0.730865


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 688ms/step - loss: 9.7803 - root_mean_squared_error: 3.1273
Epoch 1: val_root_mean_squared_error improved from inf to 0.52448, saving model to cache/ensemble_camembert-base/models/mlp/4dc8f5a523b82a95c57278aa7e88f6f57e970bfcf77360f48f5203d3d4f74837_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.6537 - root_mean_squared_error: 1.5398 - val_loss: 0.2751 - val_root_mean_squared_error: 0.5245
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4746 - root_mean_squared_error: 0.6889
Epoch 2: val_root_mean_squared_error improved from 0.52448 to 0.39606, saving model to cache/ensemble_camembert-base/models/mlp/4dc8f5a523b82a95c57278aa7e88f6f57e970bfcf77360f48f5203d3d4f74837_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7061 - root_mean_squared_error: 0.8

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8745,4.710427,2.170352,1.947097,4.710427
46,2.3221,1.292462,1.136865,0.892278,1.292462
69,0.8887,0.914295,0.956188,0.789439,0.914295
92,0.5857,0.99702,0.998509,0.823862,0.99702
115,0.5806,0.895364,0.946237,0.74327,0.895364
138,0.4141,1.011714,1.00584,0.80084,1.011714
161,0.3031,0.912516,0.955257,0.743316,0.912516


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 8.1490 - root_mean_squared_error: 2.8546
Epoch 1: val_root_mean_squared_error improved from inf to 1.12639, saving model to cache/ensemble_camembert-base/models/mlp/8afedf718fd09155412cd08bc9ef17cf89fbdea2e43a89c8385fe3cda7ec84b9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0094 - root_mean_squared_error: 1.6565 - val_loss: 1.2688 - val_root_mean_squared_error: 1.1264
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - loss: 0.9011 - root_mean_squared_error: 0.9493
Epoch 2: val_root_mean_squared_error improved from 1.12639 to 0.51082, saving model to cache/ensemble_camembert-base/models/mlp/8afedf718fd09155412cd08bc9ef17cf89fbdea2e43a89c8385fe3cda7ec84b9_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5100 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2452,3.487359,1.867447,1.670853,3.487358
46,2.7179,0.822869,0.907121,0.740439,0.822869
69,0.9482,0.688855,0.829973,0.684044,0.688855
92,0.7141,0.471315,0.686524,0.565584,0.471315
115,0.5213,0.440432,0.66365,0.553331,0.440432
138,0.3762,0.473668,0.688235,0.531694,0.473668
161,0.261,0.423465,0.650742,0.541506,0.423465


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 674ms/step - loss: 7.8823 - root_mean_squared_error: 2.8075
Epoch 1: val_root_mean_squared_error improved from inf to 0.88347, saving model to cache/ensemble_camembert-base/models/mlp/9421b04c2d8a75ef269a142243e2c9d0ff0a79540f4e08ac6ca0a808c54dd99d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5370 - root_mean_squared_error: 1.5155 - val_loss: 0.7805 - val_root_mean_squared_error: 0.8835
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.8789 - root_mean_squared_error: 0.9375
Epoch 2: val_root_mean_squared_error improved from 0.88347 to 0.40966, saving model to cache/ensemble_camembert-base/models/mlp/9421b04c2d8a75ef269a142243e2c9d0ff0a79540f4e08ac6ca0a808c54dd99d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4912 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.6886,5.066747,2.250944,2.010928,5.066747
46,2.8116,1.665091,1.290384,0.978831,1.665091
69,1.0069,1.013418,1.006686,0.812145,1.013418
92,0.7783,0.868435,0.931899,0.724016,0.868435
115,0.7701,0.644437,0.802768,0.656423,0.644437
138,0.5798,0.589955,0.768086,0.567514,0.589955
161,0.2824,0.585005,0.764856,0.557288,0.585005


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 7.1474 - root_mean_squared_error: 2.6735
Epoch 1: val_root_mean_squared_error improved from inf to 0.58810, saving model to cache/ensemble_camembert-base/models/mlp/b87fc93c444beb317690a0cdff38ec95e97539572aea9d8fd1d1493a9a70c464_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8695 - root_mean_squared_error: 1.6189 - val_loss: 0.3459 - val_root_mean_squared_error: 0.5881
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2194 - root_mean_squared_error: 0.4684
Epoch 2: val_root_mean_squared_error improved from 0.58810 to 0.52571, saving model to cache/ensemble_camembert-base/models/mlp/b87fc93c444beb317690a0cdff38ec95e97539572aea9d8fd1d1493a9a70c464_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3810 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2846,4.555358,2.134328,1.941239,4.555358
46,2.65,1.210055,1.100025,0.879398,1.210055
69,0.7568,0.792847,0.890419,0.722966,0.792847
92,1.0016,0.781099,0.883798,0.714305,0.781099
115,0.8402,0.714774,0.845443,0.637461,0.714774
138,0.6739,0.712765,0.844254,0.679166,0.712765
161,0.389,0.867791,0.931553,0.679091,0.867791


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 7.4003 - root_mean_squared_error: 2.7203
Epoch 1: val_root_mean_squared_error improved from inf to 0.69980, saving model to cache/ensemble_camembert-base/models/mlp/1be993e12967a8ba1c83c27ebea1aee09566abf310f07a6a7cab6b310970de73_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9042 - root_mean_squared_error: 1.6256 - val_loss: 0.4897 - val_root_mean_squared_error: 0.6998
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.2083 - root_mean_squared_error: 0.4564
Epoch 2: val_root_mean_squared_error did not improve from 0.69980
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3721 - root_mean_squared_error: 0.6073 - val_loss: 0.8014 - val_root_mean_squared_error: 0.8952
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7463,4.355767,2.087048,1.912899,4.355767
46,2.422,1.045345,1.022421,0.82226,1.045345
69,0.8298,0.696875,0.83479,0.705955,0.696875
92,0.7879,0.582592,0.763277,0.625568,0.582592
115,0.6586,0.641928,0.801204,0.628508,0.641928
138,0.467,0.562635,0.75009,0.57221,0.562635
161,0.3886,0.573132,0.757055,0.546765,0.573132


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 6.2070 - root_mean_squared_error: 2.4914
Epoch 1: val_root_mean_squared_error improved from inf to 0.60089, saving model to cache/ensemble_camembert-base/models/mlp/125afbfde21d6fce4900b6beb18125989c8d345fb1f51e204d8354210c50de35_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4217 - root_mean_squared_error: 1.4950 - val_loss: 0.3611 - val_root_mean_squared_error: 0.6009
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2097 - root_mean_squared_error: 0.4580
Epoch 2: val_root_mean_squared_error improved from 0.60089 to 0.51690, saving model to cache/ensemble_camembert-base/models/mlp/125afbfde21d6fce4900b6beb18125989c8d345fb1f51e204d8354210c50de35_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4313 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3992,5.018145,2.240122,2.042199,5.018145
46,2.2444,1.361005,1.166621,0.900301,1.361005
69,0.8449,0.96986,0.984815,0.770997,0.96986
92,0.6644,0.753098,0.867812,0.669468,0.753098
115,0.6139,0.857003,0.925745,0.681435,0.857003
138,0.4139,0.804573,0.89698,0.702979,0.804572
161,0.341,0.747531,0.864599,0.684611,0.747531


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m13s[0m 789ms/step - loss: 7.3803 - root_mean_squared_error: 2.7167
Epoch 1: val_root_mean_squared_error improved from inf to 0.71895, saving model to cache/ensemble_camembert-base/models/mlp/cb6873f833a31fdfa2b1f0fd149dc95e78c529e0a63b55b4b3135f49dbb89a87_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7398 - root_mean_squared_error: 1.5819 - val_loss: 0.5169 - val_root_mean_squared_error: 0.7189
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2744 - root_mean_squared_error: 0.5239
Epoch 2: val_root_mean_squared_error improved from 0.71895 to 0.60807, saving model to cache/ensemble_camembert-base/models/mlp/cb6873f833a31fdfa2b1f0fd149dc95e78c529e0a63b55b4b3135f49dbb89a87_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4159 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4779,3.372342,1.836394,1.670107,3.372342
46,3.0225,0.773983,0.879763,0.736604,0.773984
69,0.9136,0.541047,0.735559,0.594472,0.541047
92,0.646,0.623328,0.789511,0.629955,0.623328
115,0.4984,0.562227,0.749818,0.627047,0.562227
138,0.4059,0.611805,0.78218,0.638838,0.611805
161,0.3203,0.564385,0.751255,0.624911,0.564385


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 691ms/step - loss: 9.2555 - root_mean_squared_error: 3.0423
Epoch 1: val_root_mean_squared_error improved from inf to 0.77722, saving model to cache/ensemble_camembert-base/models/mlp/9086824a215aa3a754fbd8efbae63ad1d6264fb63944d6b33c452d433dc8a2bd_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1425 - root_mean_squared_error: 1.6811 - val_loss: 0.6041 - val_root_mean_squared_error: 0.7772
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 33ms/step - loss: 0.3651 - root_mean_squared_error: 0.6042
Epoch 2: val_root_mean_squared_error did not improve from 0.77722
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4259 - root_mean_squared_error: 0.6482 - val_loss: 0.8826 - val_root_mean_squared_error: 0.9395
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8958,3.451405,1.857796,1.718692,3.451405
46,2.841,0.739069,0.859691,0.702831,0.739069
69,1.2079,0.539096,0.734232,0.615172,0.539097
92,0.8822,0.372286,0.610152,0.504192,0.372286
115,0.6696,0.41024,0.6405,0.515224,0.41024
138,0.5317,0.385148,0.620603,0.497015,0.385148
161,0.3504,0.39688,0.629984,0.498144,0.39688


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 676ms/step - loss: 6.2410 - root_mean_squared_error: 2.4982
Epoch 1: val_root_mean_squared_error improved from inf to 0.54073, saving model to cache/ensemble_camembert-base/models/mlp/1bc5b333d0ae5230beb38025c6ecd92a1cce88dcef8bc965126ea107ea365691_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.6481 - root_mean_squared_error: 1.5577 - val_loss: 0.2924 - val_root_mean_squared_error: 0.5407
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2210 - root_mean_squared_error: 0.4701
Epoch 2: val_root_mean_squared_error did not improve from 0.54073
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4992 - root_mean_squared_error: 0.7016 - val_loss: 0.8038 - val_root_mean_squared_error: 0.8965
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.344,4.878265,2.20868,1.989447,4.878266
46,2.3912,1.383159,1.176078,0.966472,1.383159
69,0.7458,0.892723,0.94484,0.776769,0.892723
92,0.7042,0.855823,0.925107,0.784005,0.855823
115,0.5363,0.691691,0.83168,0.701529,0.691691
138,0.3354,0.787561,0.887446,0.697456,0.787561
161,0.2482,0.666535,0.816416,0.65461,0.666535


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 680ms/step - loss: 4.8124 - root_mean_squared_error: 2.1937
Epoch 1: val_root_mean_squared_error improved from inf to 0.64029, saving model to cache/ensemble_camembert-base/models/mlp/052a413bf3b007ee863fabd842e7c182894a9c6f615dad61c66192c22641ce00_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5923 - root_mean_squared_error: 1.5413 - val_loss: 0.4100 - val_root_mean_squared_error: 0.6403
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1636 - root_mean_squared_error: 0.4045
Epoch 2: val_root_mean_squared_error improved from 0.64029 to 0.63609, saving model to cache/ensemble_camembert-base/models/mlp/052a413bf3b007ee863fabd842e7c182894a9c6f615dad61c66192c22641ce00_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5148 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0728,3.746775,1.935659,1.687558,3.746775
46,2.7995,1.058584,1.028875,0.761657,1.058584
69,0.7996,0.907994,0.952887,0.83667,0.907994
92,0.5858,0.69666,0.834662,0.70112,0.69666
115,0.6467,0.617496,0.785809,0.651071,0.617496
138,0.6573,0.56733,0.753213,0.59803,0.56733
161,0.4815,0.589297,0.767657,0.63086,0.589297


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 6.9163 - root_mean_squared_error: 2.6299
Epoch 1: val_root_mean_squared_error improved from inf to 0.66844, saving model to cache/ensemble_camembert-base/models/mlp/187204402837291fc3e3147e4a4bec1d21ef3b70e47302c50b36064d69428e13_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7755 - root_mean_squared_error: 1.5998 - val_loss: 0.4468 - val_root_mean_squared_error: 0.6684
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1774 - root_mean_squared_error: 0.4212
Epoch 2: val_root_mean_squared_error improved from 0.66844 to 0.49683, saving model to cache/ensemble_camembert-base/models/mlp/187204402837291fc3e3147e4a4bec1d21ef3b70e47302c50b36064d69428e13_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5738 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0268,3.732197,1.93189,1.773236,3.732197
46,2.892,0.7724,0.878863,0.706207,0.7724
69,0.8226,0.583586,0.763928,0.629603,0.583586
92,0.9093,0.604095,0.777236,0.66358,0.604095
115,0.6578,0.481671,0.694025,0.569786,0.481671
138,0.7224,0.426114,0.652774,0.553455,0.426114
161,0.3763,0.477293,0.690864,0.576244,0.477293


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 6.4289 - root_mean_squared_error: 2.5355
Epoch 1: val_root_mean_squared_error improved from inf to 0.46859, saving model to cache/ensemble_camembert-base/models/mlp/af0b1614d42172262aa063f83af5042bafb9d87d5762dfe48ef50b307aaafe7f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8266 - root_mean_squared_error: 1.6158 - val_loss: 0.2196 - val_root_mean_squared_error: 0.4686
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.1561 - root_mean_squared_error: 0.3951
Epoch 2: val_root_mean_squared_error did not improve from 0.46859
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3327 - root_mean_squared_error: 0.5715 - val_loss: 1.1036 - val_root_mean_squared_error: 1.0505
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1593,4.295877,2.07265,1.858243,4.295877
46,2.841,1.164992,1.079348,0.830499,1.164992
69,0.803,0.782045,0.884333,0.69503,0.782045
92,0.596,0.832736,0.912544,0.730992,0.832736
115,0.6646,0.738102,0.859129,0.667756,0.738102
138,0.5765,0.612451,0.782592,0.628325,0.612451
161,0.3281,0.634596,0.796615,0.638517,0.634596


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 690ms/step - loss: 6.1211 - root_mean_squared_error: 2.4741
Epoch 1: val_root_mean_squared_error improved from inf to 0.80169, saving model to cache/ensemble_camembert-base/models/mlp/a596d8a06aeb2a7f837975282b4a4cfc3b0639c68c419d087a9f347d58f9765d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4593 - root_mean_squared_error: 1.4967 - val_loss: 0.6427 - val_root_mean_squared_error: 0.8017
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4935 - root_mean_squared_error: 0.7025
Epoch 2: val_root_mean_squared_error improved from 0.80169 to 0.67913, saving model to cache/ensemble_camembert-base/models/mlp/a596d8a06aeb2a7f837975282b4a4cfc3b0639c68c419d087a9f347d58f9765d_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4205 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9932,4.44084,2.10733,1.89667,4.44084
46,2.4589,1.187116,1.089548,0.850269,1.187115
69,0.9266,0.828127,0.910015,0.757979,0.828127
92,0.7898,0.640468,0.800292,0.662693,0.640468
115,0.5986,0.608542,0.780091,0.621775,0.608541
138,0.515,0.504267,0.710118,0.596238,0.504267
161,0.3717,0.647202,0.804488,0.668124,0.647201


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 6.5127 - root_mean_squared_error: 2.5520
Epoch 1: val_root_mean_squared_error improved from inf to 0.55685, saving model to cache/ensemble_camembert-base/models/mlp/a25f26fe7618185237b5994dac4a484254d3e981ed2b9bc9c4cd88ae67097ae3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9464 - root_mean_squared_error: 1.6517 - val_loss: 0.3101 - val_root_mean_squared_error: 0.5569
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5474 - root_mean_squared_error: 0.7399
Epoch 2: val_root_mean_squared_error improved from 0.55685 to 0.50077, saving model to cache/ensemble_camembert-base/models/mlp/a25f26fe7618185237b5994dac4a484254d3e981ed2b9bc9c4cd88ae67097ae3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5584 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9613,5.378462,2.319151,2.061782,5.378462
46,2.7098,1.888694,1.374298,1.03497,1.888694
69,1.0831,1.123957,1.060168,0.965949,1.123957
92,0.7834,1.077162,1.037864,0.8988,1.077162
115,0.6181,1.012124,1.006044,0.80103,1.012124
138,0.4231,1.205568,1.097984,0.820706,1.205568
161,0.2609,1.267635,1.125893,0.8653,1.267635


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 5.3527 - root_mean_squared_error: 2.3136
Epoch 1: val_root_mean_squared_error improved from inf to 0.95649, saving model to cache/ensemble_camembert-base/models/mlp/95fd9d695785a6d72912c0c14185e2207b36dc63f3376dc1cdf28a67c55ea839_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5978 - root_mean_squared_error: 1.5581 - val_loss: 0.9149 - val_root_mean_squared_error: 0.9565
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4917 - root_mean_squared_error: 0.7012
Epoch 2: val_root_mean_squared_error improved from 0.95649 to 0.50786, saving model to cache/ensemble_camembert-base/models/mlp/95fd9d695785a6d72912c0c14185e2207b36dc63f3376dc1cdf28a67c55ea839_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2637 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.3688,4.167441,2.041431,1.889635,4.167441
46,2.6537,0.913229,0.95563,0.737477,0.913229
69,1.0388,0.654232,0.808846,0.673992,0.654232
92,0.8166,0.566485,0.752652,0.618395,0.566485
115,0.6676,0.506713,0.711838,0.573744,0.506713
138,0.6477,0.547961,0.740244,0.605861,0.547961
161,0.4737,0.600928,0.775195,0.631746,0.600928


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 9.5109 - root_mean_squared_error: 3.0840
Epoch 1: val_root_mean_squared_error improved from inf to 0.99154, saving model to cache/ensemble_camembert-base/models/mlp/af52127018501ffc43aa59e690e964b2abcaa234084168b6a40cd735844600f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2313 - root_mean_squared_error: 1.7122 - val_loss: 0.9831 - val_root_mean_squared_error: 0.9915
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 1.0443 - root_mean_squared_error: 1.0219
Epoch 2: val_root_mean_squared_error improved from 0.99154 to 0.89470, saving model to cache/ensemble_camembert-base/models/mlp/af52127018501ffc43aa59e690e964b2abcaa234084168b6a40cd735844600f3_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4730 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1217,3.612736,1.90072,1.677499,3.612736
46,2.5551,0.942961,0.971062,0.768982,0.942961
69,0.9253,0.885458,0.940988,0.816881,0.885458
92,0.7856,0.576322,0.759159,0.634331,0.576322
115,0.533,0.585635,0.765268,0.596495,0.585635
138,0.3592,0.66267,0.814046,0.615081,0.66267
161,0.2528,0.704457,0.839319,0.659697,0.704457


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 689ms/step - loss: 9.0948 - root_mean_squared_error: 3.0158
Epoch 1: val_root_mean_squared_error improved from inf to 0.66827, saving model to cache/ensemble_camembert-base/models/mlp/0bc6c29be7f243b9e738e0f32a52821511f6c0f190f13b0532e71f9b46ea18a2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.4676 - root_mean_squared_error: 1.7708 - val_loss: 0.4466 - val_root_mean_squared_error: 0.6683
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3846 - root_mean_squared_error: 0.6201
Epoch 2: val_root_mean_squared_error improved from 0.66827 to 0.54904, saving model to cache/ensemble_camembert-base/models/mlp/0bc6c29be7f243b9e738e0f32a52821511f6c0f190f13b0532e71f9b46ea18a2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3906 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4858,3.083173,1.755897,1.594241,3.083173
46,2.5742,0.639351,0.799594,0.627627,0.639351
69,0.8573,0.435651,0.660038,0.483567,0.435651
92,0.592,0.443345,0.665842,0.517047,0.443345
115,0.4004,0.636805,0.798001,0.605477,0.636805
138,0.2917,0.550003,0.741622,0.600742,0.550003
161,0.1729,0.525723,0.725068,0.588165,0.525723


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 6.3890 - root_mean_squared_error: 2.5276
Epoch 1: val_root_mean_squared_error improved from inf to 0.96361, saving model to cache/ensemble_camembert-base/models/mlp/183f903ea3ace4c6c84b94fc7c5326c54cf755f66e7808187f741cb6ab8c9ad4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4124 - root_mean_squared_error: 1.4733 - val_loss: 0.9285 - val_root_mean_squared_error: 0.9636
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6527 - root_mean_squared_error: 0.8079
Epoch 2: val_root_mean_squared_error improved from 0.96361 to 0.65523, saving model to cache/ensemble_camembert-base/models/mlp/183f903ea3ace4c6c84b94fc7c5326c54cf755f66e7808187f741cb6ab8c9ad4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5193 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6184,5.616433,2.369901,2.142847,5.616433
46,2.8849,1.925102,1.38748,1.039712,1.925102
69,0.9344,0.9841,0.992018,0.807859,0.9841
92,0.6604,0.90921,0.953525,0.782223,0.90921
115,0.6758,0.954249,0.976857,0.776632,0.954249
138,0.6499,0.877397,0.936695,0.767835,0.877397
161,0.3761,0.819192,0.905092,0.742576,0.819192


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 8.8112 - root_mean_squared_error: 2.9684
Epoch 1: val_root_mean_squared_error improved from inf to 0.97518, saving model to cache/ensemble_camembert-base/models/mlp/f11d0165c931292c15a0e4fae4dcbd0290b167f5587ec7e68c07053b7a23e905_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7547 - root_mean_squared_error: 1.5747 - val_loss: 0.9510 - val_root_mean_squared_error: 0.9752
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5610 - root_mean_squared_error: 0.7490
Epoch 2: val_root_mean_squared_error improved from 0.97518 to 0.91134, saving model to cache/ensemble_camembert-base/models/mlp/f11d0165c931292c15a0e4fae4dcbd0290b167f5587ec7e68c07053b7a23e905_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5221 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1761,3.145321,1.773505,1.598276,3.145321
46,2.9703,0.675702,0.822011,0.616428,0.675702
69,0.9348,0.695987,0.834258,0.678562,0.695987
92,0.7941,0.643165,0.801976,0.569994,0.643165
115,0.6028,0.607466,0.779401,0.61279,0.607466
138,0.3852,0.73928,0.859814,0.708611,0.73928
161,0.3272,0.79415,0.891151,0.643106,0.79415


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 691ms/step - loss: 5.4178 - root_mean_squared_error: 2.3276
Epoch 1: val_root_mean_squared_error improved from inf to 1.08794, saving model to cache/ensemble_camembert-base/models/mlp/a357ce1f360b11b29548b7de647364a6e7467085a8831c85a29793f7b3c37f0a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.3952 - root_mean_squared_error: 1.4816 - val_loss: 1.1836 - val_root_mean_squared_error: 1.0879
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.6833 - root_mean_squared_error: 0.8266
Epoch 2: val_root_mean_squared_error improved from 1.08794 to 0.72049, saving model to cache/ensemble_camembert-base/models/mlp/a357ce1f360b11b29548b7de647364a6e7467085a8831c85a29793f7b3c37f0a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4295 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8027,4.482044,2.117084,1.875538,4.482044
46,2.8721,1.347301,1.160733,0.88086,1.347301
69,0.9588,0.905644,0.951653,0.757454,0.905644
92,0.791,0.786385,0.886783,0.669273,0.786385
115,0.4777,0.764685,0.874463,0.633344,0.764685
138,0.3875,0.756418,0.869723,0.665675,0.756418
161,0.2886,0.748604,0.865219,0.651528,0.748604


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 5.6867 - root_mean_squared_error: 2.3847
Epoch 1: val_root_mean_squared_error improved from inf to 0.81419, saving model to cache/ensemble_camembert-base/models/mlp/71fdcc3b8122b6ee1919855932e05d927b9bc8c6ff8b5c392de6af933844a3ec_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5541 - root_mean_squared_error: 1.5323 - val_loss: 0.6629 - val_root_mean_squared_error: 0.8142
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5358 - root_mean_squared_error: 0.7320
Epoch 2: val_root_mean_squared_error did not improve from 0.81419
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7665 - root_mean_squared_error: 0.8593 - val_loss: 1.0137 - val_root_mean_squared_error: 1.0068
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7382,3.041315,1.743937,1.555017,3.041315
46,3.4026,0.708099,0.841486,0.611396,0.708099
69,0.8934,0.697258,0.83502,0.71685,0.697258
92,0.98,0.834672,0.913604,0.793732,0.834672
115,0.8261,0.744239,0.862693,0.724479,0.744239
138,0.7734,0.711966,0.843781,0.675244,0.711966
161,0.5771,0.744449,0.862815,0.586614,0.744449


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 5.5195 - root_mean_squared_error: 2.3494
Epoch 1: val_root_mean_squared_error improved from inf to 0.78891, saving model to cache/ensemble_camembert-base/models/mlp/a1fb1df38bb1083567f4a912152ec6e66ae90613a9eb44b35adf1b59163a5294_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.2832 - root_mean_squared_error: 1.7411 - val_loss: 0.6224 - val_root_mean_squared_error: 0.7889
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5414 - root_mean_squared_error: 0.7358
Epoch 2: val_root_mean_squared_error improved from 0.78891 to 0.71252, saving model to cache/ensemble_camembert-base/models/mlp/a1fb1df38bb1083567f4a912152ec6e66ae90613a9eb44b35adf1b59163a5294_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5653 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1496,4.496445,2.120482,1.900163,4.496445
46,2.6281,1.236765,1.112099,0.905474,1.236765
69,0.9351,0.840858,0.916983,0.748379,0.840858
92,0.7025,0.762891,0.873436,0.711543,0.762891
115,0.5618,0.6225,0.788987,0.650838,0.6225
138,0.5489,0.647522,0.804688,0.658981,0.647522
161,0.3749,0.57653,0.759295,0.659789,0.57653


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 678ms/step - loss: 4.6293 - root_mean_squared_error: 2.1516
Epoch 1: val_root_mean_squared_error improved from inf to 0.83437, saving model to cache/ensemble_camembert-base/models/mlp/6230434ae7f0f4673d8de43f0b8753b2340635545796f6c393e4c59401abacc4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.2685 - root_mean_squared_error: 1.4578 - val_loss: 0.6962 - val_root_mean_squared_error: 0.8344
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.0882 - root_mean_squared_error: 0.2970
Epoch 2: val_root_mean_squared_error improved from 0.83437 to 0.61640, saving model to cache/ensemble_camembert-base/models/mlp/6230434ae7f0f4673d8de43f0b8753b2340635545796f6c393e4c59401abacc4_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5926 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8045,4.258299,2.063565,1.818829,4.258299
46,2.5592,1.127189,1.061692,0.839756,1.127189
69,0.7621,0.833059,0.91272,0.761174,0.833059
92,0.6616,0.872689,0.934178,0.72922,0.872689
115,0.5465,0.708279,0.841593,0.647636,0.708279
138,0.4428,0.81553,0.903067,0.677096,0.81553
161,0.3379,0.725297,0.851644,0.648776,0.725297


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 663ms/step - loss: 7.0362 - root_mean_squared_error: 2.6526
Epoch 1: val_root_mean_squared_error improved from inf to 0.72334, saving model to cache/ensemble_camembert-base/models/mlp/f6a0475238b2f72e8e97fa597d3d5e5fe89dac0adf4d5c4e7f07494fa5007fbe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.8940 - root_mean_squared_error: 1.6316 - val_loss: 0.5232 - val_root_mean_squared_error: 0.7233
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3085 - root_mean_squared_error: 0.5554
Epoch 2: val_root_mean_squared_error did not improve from 0.72334
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4954 - root_mean_squared_error: 0.7012 - val_loss: 0.7259 - val_root_mean_squared_error: 0.8520
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0717,4.991149,2.234088,1.994652,4.991149
46,2.9362,1.569498,1.252796,0.95283,1.569498
69,0.9647,1.011375,1.005672,0.85286,1.011375
92,0.8491,0.977826,0.988851,0.796029,0.977826
115,0.6517,0.753433,0.868005,0.7078,0.753433
138,0.6688,0.76351,0.873791,0.668726,0.76351
161,0.579,0.672356,0.819973,0.632795,0.672355


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 5.7395 - root_mean_squared_error: 2.3957
Epoch 1: val_root_mean_squared_error improved from inf to 0.44635, saving model to cache/ensemble_camembert-base/models/mlp/cee169265ea48030d95963d537ff514ef77b07792ffa7d65e6b0905f3634f920_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9599 - root_mean_squared_error: 1.6623 - val_loss: 0.1992 - val_root_mean_squared_error: 0.4463
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.2534 - root_mean_squared_error: 0.5034
Epoch 2: val_root_mean_squared_error improved from 0.44635 to 0.35652, saving model to cache/ensemble_camembert-base/models/mlp/cee169265ea48030d95963d537ff514ef77b07792ffa7d65e6b0905f3634f920_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4207 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1879,5.218445,2.284392,2.085662,5.218445
46,2.6817,1.451508,1.204786,0.972552,1.451508
69,0.7293,0.671364,0.819368,0.646196,0.671364
92,0.6352,0.67755,0.823134,0.629731,0.67755
115,0.5975,0.752026,0.867194,0.717701,0.752026
138,0.5071,0.592262,0.769585,0.644272,0.592262
161,0.4483,0.5981,0.773369,0.661353,0.5981


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 679ms/step - loss: 8.8300 - root_mean_squared_error: 2.9715
Epoch 1: val_root_mean_squared_error improved from inf to 0.83697, saving model to cache/ensemble_camembert-base/models/mlp/cafa527369311aa9487252136af8938f9ed2234720056d9d913c415486694bb6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9899 - root_mean_squared_error: 1.6446 - val_loss: 0.7005 - val_root_mean_squared_error: 0.8370
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4530 - root_mean_squared_error: 0.6730
Epoch 2: val_root_mean_squared_error improved from 0.83697 to 0.52190, saving model to cache/ensemble_camembert-base/models/mlp/cafa527369311aa9487252136af8938f9ed2234720056d9d913c415486694bb6_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3551 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2508,4.099104,2.024624,1.759156,4.099104
46,2.5451,1.264473,1.124488,0.81375,1.264473
69,0.7949,1.050972,1.025169,0.867593,1.050972
92,0.6948,0.902761,0.950138,0.684006,0.902761
115,0.7238,0.804219,0.896783,0.706896,0.804219
138,0.4575,0.729063,0.853852,0.617575,0.729063
161,0.3409,0.79132,0.889562,0.644933,0.79132


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 675ms/step - loss: 8.7746 - root_mean_squared_error: 2.9622
Epoch 1: val_root_mean_squared_error improved from inf to 0.96267, saving model to cache/ensemble_camembert-base/models/mlp/5eed6be817ccc55cb4f9507e59170cfe8070fcb1aee8bc32fa5e487cad137e6b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9487 - root_mean_squared_error: 1.6380 - val_loss: 0.9267 - val_root_mean_squared_error: 0.9627
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - loss: 1.0695 - root_mean_squared_error: 1.0341
Epoch 2: val_root_mean_squared_error did not improve from 0.96267
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4590 - root_mean_squared_error: 0.6639 - val_loss: 1.7859 - val_root_mean_squared_error: 1.3364
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9763,4.162095,2.040121,1.793995,4.162095
46,2.7081,1.208221,1.099191,0.829372,1.208221
69,0.8113,0.927967,0.963311,0.772385,0.927967
92,0.7834,1.047958,1.023698,0.789237,1.047958
115,0.6022,0.941826,0.970477,0.760119,0.941826
138,0.4772,0.785571,0.886324,0.691321,0.785571
161,0.3531,0.663794,0.814736,0.611531,0.663794


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 4.8328 - root_mean_squared_error: 2.1984
Epoch 1: val_root_mean_squared_error improved from inf to 0.68213, saving model to cache/ensemble_camembert-base/models/mlp/0f9da934e850fc9ddd6a84688029dc634de43ddcea5499b59d4fabc29432cdd2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.1778 - root_mean_squared_error: 1.4229 - val_loss: 0.4653 - val_root_mean_squared_error: 0.6821
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3060 - root_mean_squared_error: 0.5532
Epoch 2: val_root_mean_squared_error did not improve from 0.68213
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5090 - root_mean_squared_error: 0.7105 - val_loss: 0.5378 - val_root_mean_squared_error: 0.7333
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.2467,3.446998,1.856609,1.689751,3.446998
46,2.3426,0.748269,0.865025,0.603692,0.748269
69,0.8018,0.57694,0.759566,0.577612,0.57694
92,0.7112,0.609418,0.780652,0.633839,0.609418
115,0.5931,0.551171,0.742409,0.60355,0.551171
138,0.5055,0.656345,0.810151,0.651217,0.656345
161,0.2801,0.54452,0.737916,0.62255,0.54452


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 686ms/step - loss: 7.6241 - root_mean_squared_error: 2.7612
Epoch 1: val_root_mean_squared_error improved from inf to 0.68777, saving model to cache/ensemble_camembert-base/models/mlp/cd4578834db3f93adbc44069c684be0580c004fffb9119e905cd91fcc1bed98b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4963 - root_mean_squared_error: 1.4954 - val_loss: 0.4730 - val_root_mean_squared_error: 0.6878
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4870 - root_mean_squared_error: 0.6978
Epoch 2: val_root_mean_squared_error improved from 0.68777 to 0.46958, saving model to cache/ensemble_camembert-base/models/mlp/cd4578834db3f93adbc44069c684be0580c004fffb9119e905cd91fcc1bed98b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3404 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6978,3.360763,1.833238,1.647989,3.360763
46,2.4879,0.795302,0.891797,0.663344,0.795302
69,1.047,0.647538,0.804698,0.649637,0.647538
92,0.7707,0.586171,0.765618,0.640128,0.586171
115,0.5402,0.531195,0.728832,0.605382,0.531195
138,0.4741,0.478669,0.691859,0.540137,0.478669
161,0.2174,0.511631,0.715284,0.560592,0.511631


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 691ms/step - loss: 4.3834 - root_mean_squared_error: 2.0937
Epoch 1: val_root_mean_squared_error improved from inf to 0.82128, saving model to cache/ensemble_camembert-base/models/mlp/a5a9a4854aeb7f72a8a9b8e32892a4e54820d2fabb11be13054c692258051210_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4757 - root_mean_squared_error: 1.5175 - val_loss: 0.6745 - val_root_mean_squared_error: 0.8213
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3413 - root_mean_squared_error: 0.5842
Epoch 2: val_root_mean_squared_error improved from 0.82128 to 0.63766, saving model to cache/ensemble_camembert-base/models/mlp/a5a9a4854aeb7f72a8a9b8e32892a4e54820d2fabb11be13054c692258051210_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3407 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5008,3.389523,1.841066,1.658809,3.389523
46,2.5095,0.727072,0.852685,0.650703,0.727072
69,0.8631,0.552984,0.743629,0.609861,0.552984
92,0.6674,0.532034,0.729406,0.552892,0.532034
115,0.5245,0.583564,0.763914,0.638048,0.583564
138,0.5514,0.383938,0.619627,0.502377,0.383938
161,0.2874,0.371418,0.609441,0.480002,0.371418


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 682ms/step - loss: 9.0012 - root_mean_squared_error: 3.0002
Epoch 1: val_root_mean_squared_error improved from inf to 0.62879, saving model to cache/ensemble_camembert-base/models/mlp/5f8b16c5a7c32370e2acd0f8929834bb9456dbb195230a5666aeee530457cbb0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9948 - root_mean_squared_error: 1.6500 - val_loss: 0.3954 - val_root_mean_squared_error: 0.6288
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.4049 - root_mean_squared_error: 0.6363
Epoch 2: val_root_mean_squared_error improved from 0.62879 to 0.49599, saving model to cache/ensemble_camembert-base/models/mlp/5f8b16c5a7c32370e2acd0f8929834bb9456dbb195230a5666aeee530457cbb0_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4278 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.3376,3.753418,1.937374,1.790248,3.753418
46,2.1655,0.835941,0.914298,0.689937,0.835941
69,0.9457,0.572706,0.756773,0.628975,0.572706
92,0.8761,0.452092,0.672378,0.552293,0.452092
115,0.6694,0.470323,0.685801,0.555171,0.470323
138,0.4987,0.609392,0.780636,0.668521,0.609392
161,0.3775,0.512649,0.715995,0.605925,0.512649


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 4.7066 - root_mean_squared_error: 2.1695
Epoch 1: val_root_mean_squared_error improved from inf to 0.86620, saving model to cache/ensemble_camembert-base/models/mlp/25573f5fdbafd33dc5d880922f136f3c15a97967f251c075c4331054c6393df1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.1726 - root_mean_squared_error: 1.4215 - val_loss: 0.7503 - val_root_mean_squared_error: 0.8662
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.9274 - root_mean_squared_error: 0.9630
Epoch 2: val_root_mean_squared_error improved from 0.86620 to 0.48811, saving model to cache/ensemble_camembert-base/models/mlp/25573f5fdbafd33dc5d880922f136f3c15a97967f251c075c4331054c6393df1_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.9111 - root_mean_squared_error: 0.9

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4652,4.893681,2.212167,1.975608,4.893681
46,2.5528,1.444737,1.201972,0.948229,1.444737
69,0.6478,0.981411,0.990662,0.779372,0.981411
92,0.7561,0.880402,0.938298,0.750128,0.880402
115,0.5709,0.917809,0.958023,0.760631,0.917809
138,0.4999,1.107612,1.052432,0.807187,1.107612
161,0.3523,0.931593,0.96519,0.741328,0.931593


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 9.1500 - root_mean_squared_error: 3.0249
Epoch 1: val_root_mean_squared_error improved from inf to 0.73022, saving model to cache/ensemble_camembert-base/models/mlp/fe709aa942371b132a95782966e8e7efe2e93dca80ac9ef37d14fcdca605aabe_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7154 - root_mean_squared_error: 1.5609 - val_loss: 0.5332 - val_root_mean_squared_error: 0.7302
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss: 0.4389 - root_mean_squared_error: 0.6625
Epoch 2: val_root_mean_squared_error did not improve from 0.73022
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7223 - root_mean_squared_error: 0.8350 - val_loss: 1.3004 - val_root_mean_squared_error: 1.1403
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7971,3.278389,1.810632,1.591511,3.278389
46,2.8825,0.881244,0.938746,0.752254,0.881244
69,1.0384,1.015035,1.007489,0.828317,1.015035
92,0.8124,0.821754,0.906506,0.749965,0.821754
115,0.753,0.733578,0.856492,0.731128,0.733578
138,0.5396,0.590811,0.768642,0.652561,0.590811
161,0.381,0.640336,0.80021,0.646133,0.640336


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 5.5122 - root_mean_squared_error: 2.3478
Epoch 1: val_root_mean_squared_error improved from inf to 0.79759, saving model to cache/ensemble_camembert-base/models/mlp/d083a0ce7f81b925320fe3299134545653ffe67a36cd6ef2c818468c6d8c9988_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4681 - root_mean_squared_error: 1.5065 - val_loss: 0.6361 - val_root_mean_squared_error: 0.7976
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1885 - root_mean_squared_error: 0.4342
Epoch 2: val_root_mean_squared_error improved from 0.79759 to 0.71646, saving model to cache/ensemble_camembert-base/models/mlp/d083a0ce7f81b925320fe3299134545653ffe67a36cd6ef2c818468c6d8c9988_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5474 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7386,3.470939,1.863046,1.635997,3.470939
46,2.7013,0.951473,0.975435,0.764314,0.951473
69,0.9869,0.850459,0.922203,0.739735,0.850458
92,0.6924,0.764602,0.874415,0.706881,0.764601
115,0.5667,0.744137,0.862634,0.695154,0.744137
138,0.4366,0.817696,0.904266,0.75396,0.817696
161,0.3556,0.852469,0.923292,0.749586,0.852469


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 658ms/step - loss: 7.5224 - root_mean_squared_error: 2.7427
Epoch 1: val_root_mean_squared_error improved from inf to 0.79959, saving model to cache/ensemble_camembert-base/models/mlp/d166cf1fb6379e52591e8b9c4254d40ce58aeebdda74e3bd8492ad84a2dca142_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.8140 - root_mean_squared_error: 1.6049 - val_loss: 0.6393 - val_root_mean_squared_error: 0.7996
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5031 - root_mean_squared_error: 0.7093
Epoch 2: val_root_mean_squared_error improved from 0.79959 to 0.73273, saving model to cache/ensemble_camembert-base/models/mlp/d166cf1fb6379e52591e8b9c4254d40ce58aeebdda74e3bd8492ad84a2dca142_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3586 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2498,3.709186,1.925925,1.778234,3.709186
46,2.4743,0.771279,0.878225,0.66641,0.771279
69,0.8223,0.550265,0.741799,0.590874,0.550265
92,0.7271,0.425813,0.652544,0.552103,0.425813
115,0.7607,0.42572,0.652473,0.550078,0.42572
138,0.4569,0.488945,0.699246,0.569229,0.488945
161,0.3707,0.595902,0.771947,0.640359,0.595902


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 6.5294 - root_mean_squared_error: 2.5553
Epoch 1: val_root_mean_squared_error improved from inf to 0.43151, saving model to cache/ensemble_camembert-base/models/mlp/43eed69246bd9282f4a4e2b07e3e0e39980b6ea0c81608dd9085d6cd9597d69b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.7676 - root_mean_squared_error: 1.5957 - val_loss: 0.1862 - val_root_mean_squared_error: 0.4315
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.2225 - root_mean_squared_error: 0.4717
Epoch 2: val_root_mean_squared_error improved from 0.43151 to 0.41020, saving model to cache/ensemble_camembert-base/models/mlp/43eed69246bd9282f4a4e2b07e3e0e39980b6ea0c81608dd9085d6cd9597d69b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3039 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9443,4.900878,2.213793,1.993297,4.900878
46,2.3593,1.53277,1.238051,0.956224,1.53277
69,0.9986,0.937477,0.968234,0.779583,0.937477
92,0.7326,0.856546,0.925498,0.734679,0.856546
115,0.5823,0.859485,0.927084,0.696146,0.859485
138,0.4815,0.644773,0.802978,0.653658,0.644773
161,0.3551,0.677524,0.823119,0.64989,0.677524


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 4.1488 - root_mean_squared_error: 2.0369
Epoch 1: val_root_mean_squared_error improved from inf to 0.69694, saving model to cache/ensemble_camembert-base/models/mlp/bf7d445763353a455fb2ed8c1aa9d4be47cbe3341b3f4a30ceaa9e5f186c265e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.2941 - root_mean_squared_error: 1.4745 - val_loss: 0.4857 - val_root_mean_squared_error: 0.6969
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3476 - root_mean_squared_error: 0.5896
Epoch 2: val_root_mean_squared_error improved from 0.69694 to 0.56272, saving model to cache/ensemble_camembert-base/models/mlp/bf7d445763353a455fb2ed8c1aa9d4be47cbe3341b3f4a30ceaa9e5f186c265e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4985 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7897,3.539318,1.881307,1.669476,3.539317
46,2.2717,0.876066,0.935984,0.694945,0.876066
69,0.7983,0.781251,0.883884,0.740035,0.781251
92,0.6949,0.657849,0.811079,0.622007,0.657849
115,0.6655,0.61434,0.783798,0.636486,0.61434
138,0.4454,0.747787,0.864747,0.677235,0.747787
161,0.3441,0.649662,0.806016,0.629118,0.649662


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 8.5207 - root_mean_squared_error: 2.9190
Epoch 1: val_root_mean_squared_error improved from inf to 0.79496, saving model to cache/ensemble_camembert-base/models/mlp/183458d6ef231572363d79c807b3a5168d5b76c59da1f13459a03e2ee3816770_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.9655 - root_mean_squared_error: 1.6366 - val_loss: 0.6320 - val_root_mean_squared_error: 0.7950
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3522 - root_mean_squared_error: 0.5934
Epoch 2: val_root_mean_squared_error did not improve from 0.79496
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4908 - root_mean_squared_error: 0.6941 - val_loss: 1.0886 - val_root_mean_squared_error: 1.0434
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6039,4.20178,2.049824,1.816001,4.20178
46,2.2064,1.177285,1.085028,0.877255,1.177285
69,0.931,0.80545,0.897469,0.797736,0.80545
92,0.7231,0.747748,0.864724,0.704714,0.747748
115,0.8032,0.995738,0.997867,0.796323,0.995738
138,0.5475,0.830566,0.911354,0.761013,0.830566
161,0.3202,0.749193,0.865559,0.707345,0.749193


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 670ms/step - loss: 6.0959 - root_mean_squared_error: 2.4690
Epoch 1: val_root_mean_squared_error improved from inf to 1.00501, saving model to cache/ensemble_camembert-base/models/mlp/4976c05eeb22a9cdf14f2c4e4ae4c6ca8d92c6ebc48065fbd6200dadbc275e8f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.2180 - root_mean_squared_error: 1.4281 - val_loss: 1.0100 - val_root_mean_squared_error: 1.0050
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5038 - root_mean_squared_error: 0.7098
Epoch 2: val_root_mean_squared_error improved from 1.00501 to 0.61235, saving model to cache/ensemble_camembert-base/models/mlp/4976c05eeb22a9cdf14f2c4e4ae4c6ca8d92c6ebc48065fbd6200dadbc275e8f_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4857 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.6135,3.309878,1.819307,1.629359,3.309878
46,2.4934,0.78081,0.883635,0.705699,0.78081
69,0.8774,0.611114,0.781738,0.645356,0.611114
92,0.7484,0.457211,0.676174,0.556615,0.457211
115,0.4987,0.597306,0.772856,0.626835,0.597306
138,0.3851,0.605243,0.777974,0.626242,0.605243
161,0.237,0.500494,0.707456,0.572438,0.500494


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 668ms/step - loss: 7.0451 - root_mean_squared_error: 2.6543
Epoch 1: val_root_mean_squared_error improved from inf to 0.51907, saving model to cache/ensemble_camembert-base/models/mlp/d8dc3a3bcd119757161db5929b64dac089221b5ac587519629bfc3de8ed7c4cf_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0408 - root_mean_squared_error: 1.6687 - val_loss: 0.2694 - val_root_mean_squared_error: 0.5191
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7858 - root_mean_squared_error: 0.8864
Epoch 2: val_root_mean_squared_error did not improve from 0.51907
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5110 - root_mean_squared_error: 0.7124 - val_loss: 0.3917 - val_root_mean_squared_error: 0.6258
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.1983,2.860444,1.691285,1.476492,2.860444
46,3.0971,0.713787,0.844859,0.701813,0.713787
69,0.7129,1.012538,1.006249,0.849474,1.012538
92,0.8042,0.575498,0.758616,0.644927,0.575498
115,0.5756,0.537477,0.733129,0.568729,0.537477
138,0.4735,0.4707,0.686076,0.558941,0.4707
161,0.3352,0.436295,0.660526,0.523725,0.436295


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 659ms/step - loss: 10.9687 - root_mean_squared_error: 3.3119
Epoch 1: val_root_mean_squared_error improved from inf to 0.46170, saving model to cache/ensemble_camembert-base/models/mlp/eb56009fedb982f754ad85aed01dadec57982231482e1ef07839c4bdf9268de7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.0345 - root_mean_squared_error: 1.6459 - val_loss: 0.2132 - val_root_mean_squared_error: 0.4617
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.3406 - root_mean_squared_error: 0.5836
Epoch 2: val_root_mean_squared_error improved from 0.46170 to 0.42048, saving model to cache/ensemble_camembert-base/models/mlp/eb56009fedb982f754ad85aed01dadec57982231482e1ef07839c4bdf9268de7_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.4050 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9581,3.391055,1.841482,1.627776,3.391056
46,2.2142,0.82519,0.9084,0.713005,0.82519
69,0.919,0.792744,0.890362,0.742317,0.792744
92,0.6812,0.631779,0.794845,0.648955,0.631779
115,0.6131,0.749648,0.865822,0.725306,0.749648
138,0.5183,0.823899,0.907689,0.753301,0.823899
161,0.2918,0.929189,0.963944,0.792179,0.929189


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 664ms/step - loss: 8.5031 - root_mean_squared_error: 2.9160
Epoch 1: val_root_mean_squared_error improved from inf to 0.55245, saving model to cache/ensemble_camembert-base/models/mlp/1f0680205112bab0fa887c0a2a33810a05bec5e449ce8aaa3aecb1affeb4c30c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.1655 - root_mean_squared_error: 1.6966 - val_loss: 0.3052 - val_root_mean_squared_error: 0.5524
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.4686 - root_mean_squared_error: 0.6846
Epoch 2: val_root_mean_squared_error did not improve from 0.55245
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3309 - root_mean_squared_error: 0.5723 - val_loss: 0.3216 - val_root_mean_squared_error: 0.5671
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8424,3.612409,1.900634,1.719588,3.612409
46,2.7204,0.818249,0.904571,0.724351,0.818249
69,0.9586,0.666339,0.816296,0.668903,0.666339
92,0.8296,0.462783,0.680281,0.537495,0.462783
115,0.7222,0.500428,0.707409,0.564374,0.500428
138,0.5613,0.454936,0.674489,0.531848,0.454936
161,0.3912,0.41867,0.647047,0.529585,0.41867


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 666ms/step - loss: 7.4691 - root_mean_squared_error: 2.7330
Epoch 1: val_root_mean_squared_error improved from inf to 0.66423, saving model to cache/ensemble_camembert-base/models/mlp/23d74afe30d4b952316e803e78a46796dc8cb76e6f7a3a8e71ebbc1cbafaef9e_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.4193 - root_mean_squared_error: 1.4818 - val_loss: 0.4412 - val_root_mean_squared_error: 0.6642
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.7172 - root_mean_squared_error: 0.8469
Epoch 2: val_root_mean_squared_error did not improve from 0.66423
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6328 - root_mean_squared_error: 0.7862 - val_loss: 1.0065 - val_root_mean_squared_error: 1.0033
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.4462,4.193833,2.047885,1.831714,4.193833
46,3.0085,1.225389,1.106973,0.809486,1.225389
69,0.9283,0.8316,0.911921,0.751475,0.8316
92,0.6423,0.610383,0.78127,0.614637,0.610383
115,0.6512,0.621093,0.788095,0.632147,0.621093
138,0.5377,0.717358,0.84697,0.675078,0.717358
161,0.2899,0.733658,0.856538,0.661821,0.733658


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 667ms/step - loss: 8.1737 - root_mean_squared_error: 2.8590
Epoch 1: val_root_mean_squared_error improved from inf to 0.70616, saving model to cache/ensemble_camembert-base/models/mlp/7a78e08dad16cc2b21c1c2cdba1fd614606ac0666f16abe470aa6f34dff8397b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.0514 - root_mean_squared_error: 1.6681 - val_loss: 0.4987 - val_root_mean_squared_error: 0.7062
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5139 - root_mean_squared_error: 0.7169
Epoch 2: val_root_mean_squared_error improved from 0.70616 to 0.58311, saving model to cache/ensemble_camembert-base/models/mlp/7a78e08dad16cc2b21c1c2cdba1fd614606ac0666f16abe470aa6f34dff8397b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5404 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2057,4.501325,2.121633,1.939796,4.501325
46,2.3445,1.121475,1.058997,0.85084,1.121475
69,0.7953,0.762135,0.873004,0.693969,0.762135
92,0.782,0.723313,0.850478,0.65495,0.723313
115,0.5615,1.138286,1.066905,0.806505,1.138286
138,0.4319,1.15133,1.073,0.819561,1.15133
161,0.2156,1.074491,1.036577,0.799043,1.074491


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 671ms/step - loss: 6.2518 - root_mean_squared_error: 2.5004
Epoch 1: val_root_mean_squared_error improved from inf to 0.68389, saving model to cache/ensemble_camembert-base/models/mlp/483b33af5f90d2d7949f7aa3288319156264cd8b86a042df746190742285050b_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.9496 - root_mean_squared_error: 1.6459 - val_loss: 0.4677 - val_root_mean_squared_error: 0.6839
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 0.1615 - root_mean_squared_error: 0.4019
Epoch 2: val_root_mean_squared_error did not improve from 0.68389
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4401 - root_mean_squared_error: 0.6542 - val_loss: 1.0436 - val_root_mean_squared_error: 1.0216
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8206,3.960266,1.990042,1.790811,3.960267
46,2.479,0.970191,0.984983,0.774898,0.970191
69,0.8863,0.720475,0.848808,0.714574,0.720475
92,0.7425,0.564449,0.751298,0.650125,0.564449
115,0.6037,0.582896,0.763476,0.602605,0.582896
138,0.3396,0.495102,0.703635,0.581686,0.495102
161,0.286,0.533644,0.73051,0.597983,0.533644


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 669ms/step - loss: 6.8661 - root_mean_squared_error: 2.6203
Epoch 1: val_root_mean_squared_error improved from inf to 0.64525, saving model to cache/ensemble_camembert-base/models/mlp/a98874d3af321769dacd345fef988fba1d8e5abf878391234b1670475bec4d29_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4697 - root_mean_squared_error: 1.5020 - val_loss: 0.4164 - val_root_mean_squared_error: 0.6453
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2240 - root_mean_squared_error: 0.4733
Epoch 2: val_root_mean_squared_error did not improve from 0.64525
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3591 - root_mean_squared_error: 0.5953 - val_loss: 0.5542 - val_root_mean_squared_error: 0.7445
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.1731,5.075987,2.252995,2.052606,5.075987
46,2.1045,1.390651,1.179259,0.888868,1.390651
69,0.8348,1.152695,1.073636,0.801787,1.152695
92,0.8315,0.923112,0.960787,0.722525,0.923112
115,0.7706,0.730322,0.854589,0.655293,0.730322
138,0.556,0.727472,0.85292,0.659313,0.727472
161,0.4505,0.630101,0.793789,0.630906,0.630101


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 662ms/step - loss: 7.0502 - root_mean_squared_error: 2.6552
Epoch 1: val_root_mean_squared_error improved from inf to 0.58343, saving model to cache/ensemble_camembert-base/models/mlp/4e0c0e453d7907eb14cdeff662e022ed234097c96d59df1b7f89bf57b8b09c82_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.5404 - root_mean_squared_error: 1.5312 - val_loss: 0.3404 - val_root_mean_squared_error: 0.5834
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.4675 - root_mean_squared_error: 0.6838
Epoch 2: val_root_mean_squared_error did not improve from 0.58343
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.6043 - root_mean_squared_error: 0.7737 - val_loss: 0.3419 - val_root_mean_squared_error: 0.5847
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.7926,3.326955,1.823994,1.623171,3.326956
46,2.8532,0.847285,0.920481,0.782577,0.847285
69,0.8302,0.716312,0.846352,0.688487,0.716312
92,0.6736,0.852586,0.923356,0.72378,0.852586
115,0.5702,0.897792,0.947519,0.775148,0.897792
138,0.4345,0.762838,0.873406,0.715218,0.762838
161,0.3338,0.760157,0.87187,0.710312,0.760157


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 8.6025 - root_mean_squared_error: 2.9330
Epoch 1: val_root_mean_squared_error improved from inf to 0.79604, saving model to cache/ensemble_camembert-base/models/mlp/b2f5822a98d84c3dcc011d46f3a437013cd7580faa1d4afe658dfa9579130925_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 2.8487 - root_mean_squared_error: 1.5970 - val_loss: 0.6337 - val_root_mean_squared_error: 0.7960
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.5296 - root_mean_squared_error: 0.7277
Epoch 2: val_root_mean_squared_error improved from 0.79604 to 0.56226, saving model to cache/ensemble_camembert-base/models/mlp/b2f5822a98d84c3dcc011d46f3a437013cd7580faa1d4afe658dfa9579130925_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3392 - root_mean_squared_error: 0.5

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8861,3.659048,1.912864,1.749917,3.659048
46,2.7822,0.856204,0.925313,0.728537,0.856203
69,1.0829,0.585224,0.764999,0.619124,0.585224
92,0.8417,0.553572,0.744024,0.587257,0.553572
115,0.7004,0.689815,0.830551,0.626177,0.689815
138,0.5529,0.643553,0.802218,0.618201,0.643553
161,0.4373,0.674173,0.82108,0.61655,0.674173


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 6.8531 - root_mean_squared_error: 2.6178
Epoch 1: val_root_mean_squared_error improved from inf to 0.50862, saving model to cache/ensemble_camembert-base/models/mlp/3faa95308897e7acf1cd78779ed6236072a67a0a737c4f0dda5f4c0d29bb6a37_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7464 - root_mean_squared_error: 1.5869 - val_loss: 0.2587 - val_root_mean_squared_error: 0.5086
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3682 - root_mean_squared_error: 0.6068
Epoch 2: val_root_mean_squared_error improved from 0.50862 to 0.39779, saving model to cache/ensemble_camembert-base/models/mlp/3faa95308897e7acf1cd78779ed6236072a67a0a737c4f0dda5f4c0d29bb6a37_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.5065 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.0607,4.627112,2.151072,1.927578,4.627112
46,2.4327,1.287323,1.134602,0.887199,1.287323
69,0.7472,0.886079,0.941318,0.735521,0.886079
92,0.7614,0.695309,0.833852,0.682716,0.695309
115,0.5516,0.619833,0.787295,0.640779,0.619833
138,0.3048,0.523763,0.723715,0.606808,0.523763
161,0.2509,0.652162,0.807566,0.656449,0.652162


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 660ms/step - loss: 4.9482 - root_mean_squared_error: 2.2245
Epoch 1: val_root_mean_squared_error improved from inf to 0.64263, saving model to cache/ensemble_camembert-base/models/mlp/824f87d6c7c60f9781842265a1024888cb76f930850c07e83abbaf20e904038a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.3818 - root_mean_squared_error: 1.4839 - val_loss: 0.4130 - val_root_mean_squared_error: 0.6426
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.2442 - root_mean_squared_error: 0.4942
Epoch 2: val_root_mean_squared_error did not improve from 0.64263
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4393 - root_mean_squared_error: 0.6588 - val_loss: 0.7350 - val_root_mean_squared_error: 0.8573
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.4769,3.754836,1.93774,1.670382,3.754836
46,2.4736,1.113458,1.055205,0.762342,1.113458
69,0.882,0.93731,0.968148,0.770614,0.93731
92,0.6602,0.816448,0.903575,0.757805,0.816448
115,0.6322,0.705875,0.840164,0.635356,0.705875
138,0.485,0.643536,0.802207,0.597722,0.643536
161,0.3165,0.52045,0.721422,0.552181,0.52045


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 672ms/step - loss: 4.8325 - root_mean_squared_error: 2.1983
Epoch 1: val_root_mean_squared_error improved from inf to 0.65464, saving model to cache/ensemble_camembert-base/models/mlp/144a1ad3bb37d3f062d095a029bb526e6b0dda7af7fadfd6336945264a60d876_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.5533 - root_mean_squared_error: 1.5359 - val_loss: 0.4286 - val_root_mean_squared_error: 0.6546
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1163 - root_mean_squared_error: 0.3410
Epoch 2: val_root_mean_squared_error improved from 0.65464 to 0.62373, saving model to cache/ensemble_camembert-base/models/mlp/144a1ad3bb37d3f062d095a029bb526e6b0dda7af7fadfd6336945264a60d876_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5379 - root_mean_squared_error: 0.7

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.698,3.741691,1.934345,1.748069,3.741691
46,2.8587,0.863548,0.929273,0.733173,0.863548
69,0.8017,0.665249,0.815628,0.672076,0.665249
92,0.8655,0.642489,0.801554,0.678089,0.642489
115,0.5975,0.647898,0.804921,0.67028,0.647898
138,0.5486,0.504595,0.710349,0.563673,0.504595
161,0.3315,0.552755,0.743475,0.603739,0.552755


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 6.3325 - root_mean_squared_error: 2.5165
Epoch 1: val_root_mean_squared_error improved from inf to 0.54064, saving model to cache/ensemble_camembert-base/models/mlp/7f65c1db0bf5816778ccfac18dbf377cb824894ecf1f586c7be43ac5a083472a_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7578 - root_mean_squared_error: 1.5890 - val_loss: 0.2923 - val_root_mean_squared_error: 0.5406
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.1841 - root_mean_squared_error: 0.4291
Epoch 2: val_root_mean_squared_error did not improve from 0.54064
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4427 - root_mean_squared_error: 0.6591 - val_loss: 0.4767 - val_root_mean_squared_error: 0.6904
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8228,3.572161,1.890016,1.672239,3.572161
46,3.0877,0.925206,0.961876,0.736462,0.925206
69,0.7126,0.87429,0.935035,0.777842,0.87429
92,0.739,0.640014,0.800009,0.651782,0.640014
115,0.6409,0.700902,0.837199,0.685676,0.700902
138,0.5278,0.660519,0.812723,0.663135,0.660519
161,0.3142,0.710693,0.843026,0.671161,0.710693


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 657ms/step - loss: 7.2833 - root_mean_squared_error: 2.6988
Epoch 1: val_root_mean_squared_error improved from inf to 0.67554, saving model to cache/ensemble_camembert-base/models/mlp/cd4998d41f3be2232c447d11389f4d70eaf55c635ad49187ab78fbeadd7e3492_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.7233 - root_mean_squared_error: 1.5775 - val_loss: 0.4563 - val_root_mean_squared_error: 0.6755
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.1875 - root_mean_squared_error: 0.4330
Epoch 2: val_root_mean_squared_error improved from 0.67554 to 0.60541, saving model to cache/ensemble_camembert-base/models/mlp/cd4998d41f3be2232c447d11389f4d70eaf55c635ad49187ab78fbeadd7e3492_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.2400 - root_mean_squared_error: 0.4

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9835,3.958182,1.989518,1.789203,3.958182
46,2.2281,1.00708,1.003534,0.748928,1.00708
69,1.0286,0.744909,0.863081,0.707775,0.744909
92,0.7331,0.701759,0.837711,0.633684,0.701759
115,0.5219,0.641291,0.800806,0.621454,0.641291
138,0.5379,0.677913,0.823355,0.665246,0.677913
161,0.4483,0.695013,0.833675,0.689506,0.695013


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 665ms/step - loss: 7.0967 - root_mean_squared_error: 2.6640
Epoch 1: val_root_mean_squared_error improved from inf to 0.59605, saving model to cache/ensemble_camembert-base/models/mlp/474df8b6e3907935850fd01c78e7384e00b4dd9a1d64a3fdd8ebdc32fcadfdf8_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.2062 - root_mean_squared_error: 1.7151 - val_loss: 0.3553 - val_root_mean_squared_error: 0.5960
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.5038 - root_mean_squared_error: 0.7098
Epoch 2: val_root_mean_squared_error did not improve from 0.59605
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.5446 - root_mean_squared_error: 0.7330 - val_loss: 1.4077 - val_root_mean_squared_error: 1.1865
Epoch 3/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.8343,5.190874,2.278349,2.02595,5.190874
46,2.6978,1.733137,1.316487,0.981212,1.733137
69,0.9391,1.091091,1.044553,0.91465,1.091091
92,0.7188,0.917675,0.957954,0.811917,0.917675
115,0.5635,1.020578,1.010237,0.82612,1.020578
138,0.3457,1.102291,1.049901,0.87896,1.102291
161,0.2403,1.04357,1.021553,0.867205,1.04357


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 683ms/step - loss: 5.8628 - root_mean_squared_error: 2.4213
Epoch 1: val_root_mean_squared_error improved from inf to 1.18253, saving model to cache/ensemble_camembert-base/models/mlp/73ddc85f8bf9c6e15b91737a92d7b7d80d0b2db80567c5393f027defb387fc3c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 2.4230 - root_mean_squared_error: 1.4996 - val_loss: 1.3984 - val_root_mean_squared_error: 1.1825
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.8051 - root_mean_squared_error: 0.8973
Epoch 2: val_root_mean_squared_error improved from 1.18253 to 0.58310, saving model to cache/ensemble_camembert-base/models/mlp/73ddc85f8bf9c6e15b91737a92d7b7d80d0b2db80567c5393f027defb387fc3c_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3817 - root_mean_squared_error: 0.6

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,6.2809,3.769734,1.94158,1.779015,3.769734
46,2.3896,0.803656,0.896468,0.667716,0.803656
69,0.9614,0.6234,0.789557,0.647443,0.6234
92,0.6776,0.671496,0.819449,0.688357,0.671496
115,0.6163,0.76583,0.875117,0.766557,0.76583
138,0.3766,0.781353,0.883942,0.751361,0.781353
161,0.2298,0.706082,0.840287,0.68978,0.706082


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 685ms/step - loss: 10.2542 - root_mean_squared_error: 3.2022
Epoch 1: val_root_mean_squared_error improved from inf to 0.63330, saving model to cache/ensemble_camembert-base/models/mlp/b4d66ea1b50ce4ffaf7430ca73e58b8cf882116ec261949890cabb97dce89ef2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 3.1886 - root_mean_squared_error: 1.6931 - val_loss: 0.4011 - val_root_mean_squared_error: 0.6333
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - loss: 0.3605 - root_mean_squared_error: 0.6004
Epoch 2: val_root_mean_squared_error improved from 0.63330 to 0.46543, saving model to cache/ensemble_camembert-base/models/mlp/b4d66ea1b50ce4ffaf7430ca73e58b8cf882116ec261949890cabb97dce89ef2_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3049 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.9045,3.060066,1.749304,1.545488,3.060066
46,2.4379,0.744538,0.862866,0.666928,0.744538
69,0.9825,0.825957,0.908822,0.773712,0.825957
92,0.8269,0.760797,0.872237,0.737072,0.760797
115,0.8821,0.69628,0.834434,0.710699,0.69628
138,0.5983,0.747348,0.864493,0.732593,0.747348
161,0.6508,0.627758,0.792312,0.626574,0.627758


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 681ms/step - loss: 10.2410 - root_mean_squared_error: 3.2002
Epoch 1: val_root_mean_squared_error improved from inf to 0.44718, saving model to cache/ensemble_camembert-base/models/mlp/2915e102be27d8ee21602ad49f1da4d8af2bca91b52c9aa804e29667dee2d601_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 3.6381 - root_mean_squared_error: 1.8094 - val_loss: 0.2000 - val_root_mean_squared_error: 0.4472
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 0.3503 - root_mean_squared_error: 0.5919
Epoch 2: val_root_mean_squared_error improved from 0.44718 to 0.41485, saving model to cache/ensemble_camembert-base/models/mlp/2915e102be27d8ee21602ad49f1da4d8af2bca91b52c9aa804e29667dee2d601_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.5099 - root_mean_squared_error: 0.

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at almanach/camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Root Mean Squared Error,Mean Absolute Error,Mean Squared Error
23,5.5556,3.12634,1.768146,1.592193,3.12634
46,2.3728,0.719175,0.848042,0.655244,0.719175
69,0.8687,0.580971,0.762214,0.602671,0.580971
92,0.8285,0.627451,0.792118,0.625249,0.627451
115,0.4617,0.680983,0.825217,0.610768,0.680983
138,0.4752,0.573852,0.757531,0.604477,0.573852
161,0.2714,0.601993,0.775882,0.624644,0.601993


Epoch 1/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 661ms/step - loss: 5.1542 - root_mean_squared_error: 2.2703
Epoch 1: val_root_mean_squared_error improved from inf to 0.84615, saving model to cache/ensemble_camembert-base/models/mlp/a26faa62580a71129dba42331db1036b03fb5a20d296815b9af286c97f35a242_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 2.2374 - root_mean_squared_error: 1.4325 - val_loss: 0.7160 - val_root_mean_squared_error: 0.8461
Epoch 2/10
[1m 1/18[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - loss: 0.7233 - root_mean_squared_error: 0.8505
Epoch 2: val_root_mean_squared_error improved from 0.84615 to 0.65219, saving model to cache/ensemble_camembert-base/models/mlp/a26faa62580a71129dba42331db1036b03fb5a20d296815b9af286c97f35a242_camembert-base_mlp.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6205 - root_mean_squared_error: 0.7

  df_macro_ensemble_scores = pd.concat([df_macro_ensemble_scores, new_row], ignore_index=True)


In [None]:
%rm -rf cache

In [6]:
from google.colab import files
!zip -r /content/logs.zip /content/training/cache/ensemble_camembert-base/logs

files.download('/content/logs.zip')

  adding: content/training/cache/ensemble_camembert-base/logs/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725259041.2d88bd8a8071.326.1 (deflated 64%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725260762.2d88bd8a8071.326.41 (deflated 64%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725264315.2d88bd8a8071.326.121 (deflated 64%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725262515.2d88bd8a8071.326.81 (deflated 64%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_1/events.out.tfevents.1725266151.2d88bd8a8071.326.161 (deflated 64%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_26/ (stored 0%)
  adding: content/training/cache/ensemble_camembert-base/logs/member_2

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!cp -r /content/training/cache/ensemble_camembert-base/models /content/drive/MyDrive/Models0109

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
