In [1]:
import torch
import pandas as pd
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

# Load df from CSV file
data="df_shuffled_sven"
df = pd.read_csv(f'data/{data}.csv', on_bad_lines="skip")
# df = df[df["federation_level"] == "Germany"]
if data=='df_testing':
    max_length = 512
    df = df[df['x'].str.len() < max_length].sample(500)

if data=='df_gpt_output':
    max_length = 512
    df = df[df['x'].str.len() < max_length]
    new_class_dict={"Medical":np.int64(0), 
               "Accommodation":np.int64(1),
               "Government Services":np.int64(2), 
               "Banking":np.int64(3), 
               "Transport":np.int64(4)}
    df['y_gpt_pred_str'] = df['y_gpt_pred_str'].apply(lambda x: x.strip())
    df['y'] = df['y_gpt_pred_str'].map(new_class_dict)
    df = df.dropna(subset=['y'])

if data=='df_shuffled_sven':
    max_length = 512
    df = df[df['x'].str.len() < max_length]
    df = df[df['y']!=-1]
    DF = df.dropna(subset=['y'])
    # df = df[df.federation_level.isin(["Germany", "France", "Spain"])]
print("class distribution", type(df.y.iloc[0]), df.y.value_counts())
model_name = 'bert-base-multilingual-uncased'
num_labels = len(DF.y.unique())
tokenizer = BertTokenizer.from_pretrained(model_name)
with open("models/results/results_federated.txt", "w") as file:
    for fed_round in range(1,4):
        weight_list = []
        for country in DF.federation_level.unique():
            df = DF[DF.federation_level==country]
            print(f"running fed round {fed_round} of {country} with {len(df)} ")
            if fed_round==1:  
                model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) # NOTE num_labels need to start at 0 important for labelling
                # Split the data into train, validation, and test sets
            else:
                model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
                # Set the model's weights to the averaged weights
                model.load_state_dict(averaged_weights)

            train_data, test_data = train_test_split(df, test_size=0.4, random_state=42)
            val_data, test_data = train_test_split(test_data, test_size=0.5, random_state=42)

            # Tokenize and encode the text data for train, validation, and test sets
            train_inputs = tokenizer(
                train_data['x'].tolist(),
                padding=True,
                truncation=True,
                return_tensors='pt'
            )

            val_inputs = tokenizer(
                val_data['x'].tolist(),
                padding=True,
                truncation=True,
                return_tensors='pt'
            )

            test_inputs = tokenizer(
                test_data['x'].tolist(),
                padding=True,
                truncation=True,
                return_tensors='pt'
            )

            # Convert labels to tensors for train, validation, and test sets
            train_labels = torch.tensor(train_data['y'].tolist())
            val_labels = torch.tensor(val_data['y'].tolist())
            test_labels = torch.tensor(test_data['y'].tolist())

            # Create TensorDatasets for train, validation, and test sets
            train_dataset = TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_labels)
            val_dataset = TensorDataset(val_inputs['input_ids'], val_inputs['attention_mask'], val_labels)
            test_dataset = TensorDataset(test_inputs['input_ids'], test_inputs['attention_mask'], test_labels)

            # Define batch size and create DataLoaders for train, validation, and test sets
            batch_size = 8
            train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
            test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

            # Define optimizer and learning rate scheduler
            optimizer = AdamW(model.parameters(), lr=2e-5)
            total_steps = len(train_dataloader) * 10  # 10 epochs
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

            # Fine-tuning loop
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            device = "mps"
            # print(device)
            model.to(device)

            best_val_loss = float('inf')
            best_model = None

            for epoch in range(3):  # 10 epochs
                print(f'Epoch {epoch + 1}/{10}:')
                total_loss = 0
                model.train()

                for batch in train_dataloader:
                    input_ids, attention_mask, batch_labels = batch
                    input_ids = input_ids.to(device)
                    attention_mask = attention_mask.to(device)
                    batch_labels = batch_labels.to(device)

                    model.zero_grad()

                    outputs = model(
                        input_ids=input_ids,
                        attention_mask=attention_mask,
                        labels=batch_labels
                    )

                    loss = outputs.loss
                    total_loss += loss.item()

                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                    optimizer.step()

                scheduler.step()

                # Calculate average training loss for the epoch
                avg_loss = total_loss / len(train_dataloader)
                print(f'Training Loss: {avg_loss}')

                # Evaluate on the validation set
                model.eval()
                val_loss = 0
                val_correct = 0

                with torch.no_grad():
                    for batch in val_dataloader:
                        input_ids, attention_mask, batch_labels = batch
                        input_ids = input_ids.to(device)
                        attention_mask = attention_mask.to(device)
                        batch_labels = batch_labels.to(device)

                        outputs = model(
                            input_ids=input_ids,
                            attention_mask=attention_mask,
                            labels=batch_labels
                        )

                        loss = outputs.loss
                        logits = outputs.logits

                        val_loss += loss.item()

                        # Calculate the number of correct predictions
                        _, predicted_labels = torch.max(logits, dim=1)
                        val_correct += torch.sum(predicted_labels == batch_labels).item()

                avg_val_loss = val_loss / len(val_dataloader)
                val_accuracy = val_correct / len(val_dataset)

                print(f'Validation Loss: {avg_val_loss}')
                print(f'Validation Accuracy: {val_accuracy}')

                # Save the best model based on validation loss
                if avg_val_loss < best_val_loss:
                    best_val_loss = avg_val_loss
                    best_model = model.state_dict()

            # Load the best model
            model.load_state_dict(best_model)

            # Evaluate the model on the test set
            model.eval()
            total_test_loss = 0
            total_test_correct = 0

            with torch.no_grad():
                for batch in test_dataloader:
                    input_ids, attention_mask, batch_labels = batch
                    input_ids = input_ids.to(device)
                    attention_mask = attention_mask.to(device)
                    batch_labels = batch_labels.to(device)

                    outputs = model(
                        input_ids=input_ids,
                        attention_mask=attention_mask,
                        labels=batch_labels
                    )

                    loss = outputs.loss
                    logits = outputs.logits

                    total_test_loss += loss.item()

                    # Calculate the number of correct predictions
                    _, predicted_labels = torch.max(logits, dim=1)
                    total_test_correct += torch.sum(predicted_labels == batch_labels).item()

            # Calculate average test loss and accuracy
            avg_test_loss = total_test_loss / len(test_dataloader)
            accuracy = total_test_correct / len(test_dataset)

            # print(f'Test Loss: {avg_test_loss}')
            file.write(f'Test Accuracy {fed_round} {country}: {accuracy}')
            weight_list.append(model.state_dict())
        # print(weight_list)
        averaged_weights = {}

        # Average the weights
        for key in weight_list[0].keys():
            weights_float = [weights[key].float() for weights in weight_list]
            averaged_weights[key] = torch.mean(torch.stack(weights_float), dim=0)

  from .autonotebook import tqdm as notebook_tqdm


class distribution <class 'numpy.int64'> 1    168
0    162
4    160
3    158
2    118
Name: y, dtype: int64
running fed round 1 of Portugal with 59 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.591948103904724
Validation Loss: 1.5862531065940857
Validation Accuracy: 0.25
Epoch 2/10:
Training Loss: 1.4392409801483155
Validation Loss: 1.578792691230774
Validation Accuracy: 0.25
Epoch 3/10:
Training Loss: 1.3726386070251464
Validation Loss: 1.646236538887024
Validation Accuracy: 0.25
running fed round 1 of France with 116 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.6055569913652208
Validation Loss: 1.5977102120717366
Validation Accuracy: 0.13043478260869565
Epoch 2/10:
Training Loss: 1.550965083969964
Validation Loss: 1.5976245403289795
Validation Accuracy: 0.17391304347826086
Epoch 3/10:
Training Loss: 1.4257835679584079
Validation Loss: 1.5392910639444988
Validation Accuracy: 0.391304347826087
running fed round 1 of Germany with 151 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.6084809799989064
Validation Loss: 1.5656737983226776
Validation Accuracy: 0.43333333333333335
Epoch 2/10:
Training Loss: 1.5361307561397552
Validation Loss: 1.536989450454712
Validation Accuracy: 0.3333333333333333
Epoch 3/10:
Training Loss: 1.5657596190770466
Validation Loss: 1.4750037491321564
Validation Accuracy: 0.4
running fed round 1 of Switzerland with 96 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.6192587465047836
Validation Loss: 1.6013012329737346
Validation Accuracy: 0.15789473684210525
Epoch 2/10:
Training Loss: 1.5200439244508743
Validation Loss: 1.4801735083262126
Validation Accuracy: 0.42105263157894735
Epoch 3/10:
Training Loss: 1.334192469716072
Validation Loss: 1.3743599653244019
Validation Accuracy: 0.47368421052631576
running fed round 1 of Belgium with 92 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.582494122641427
Validation Loss: 1.6622969309488933
Validation Accuracy: 0.16666666666666666
Epoch 2/10:
Training Loss: 1.5125961984906877
Validation Loss: 1.7959564526875813
Validation Accuracy: 0.16666666666666666
Epoch 3/10:
Training Loss: 1.483601178441729
Validation Loss: 1.763909896214803
Validation Accuracy: 0.2222222222222222
running fed round 1 of Spain with 142 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.6264622536572544
Validation Loss: 1.565717101097107
Validation Accuracy: 0.25
Epoch 2/10:
Training Loss: 1.5515630895441228
Validation Loss: 1.5291853845119476
Validation Accuracy: 0.25
Epoch 3/10:
Training Loss: 1.3894118395718662
Validation Loss: 1.4050131440162659
Validation Accuracy: 0.5
running fed round 1 of Poland with 62 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.5819708585739136
Validation Loss: 1.5994150042533875
Validation Accuracy: 0.3333333333333333
Epoch 2/10:
Training Loss: 1.4771887540817261
Validation Loss: 1.6113207936286926
Validation Accuracy: 0.3333333333333333
Epoch 3/10:
Training Loss: 1.3795652389526367
Validation Loss: 1.6789566278457642
Validation Accuracy: 0.3333333333333333
running fed round 1 of CzechRepublic with 48 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.5931178629398346
Validation Loss: 1.6008641123771667
Validation Accuracy: 0.3
Epoch 2/10:
Training Loss: 1.5717360377311707
Validation Loss: 1.6205008029937744
Validation Accuracy: 0.3
Epoch 3/10:
Training Loss: 1.5632958710193634
Validation Loss: 1.6309661865234375
Validation Accuracy: 0.3
running fed round 2 of Portugal with 59 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.5367575883865356
Validation Loss: 1.5930812358856201
Validation Accuracy: 0.25
Epoch 2/10:
Training Loss: 1.4485288381576538
Validation Loss: 1.6134250164031982
Validation Accuracy: 0.25
Epoch 3/10:
Training Loss: 1.4187909364700317
Validation Loss: 1.6017886996269226
Validation Accuracy: 0.25
running fed round 2 of France with 116 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.5909649398591783
Validation Loss: 1.6001903613408406
Validation Accuracy: 0.13043478260869565
Epoch 2/10:
Training Loss: 1.5366308821572199
Validation Loss: 1.5591785907745361
Validation Accuracy: 0.13043478260869565
Epoch 3/10:
Training Loss: 1.4624752733442519
Validation Loss: 1.5072204271952312
Validation Accuracy: 0.2608695652173913
running fed round 2 of Germany with 151 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.557774970928828
Validation Loss: 1.5326212644577026
Validation Accuracy: 0.4
Epoch 2/10:
Training Loss: 1.4709281424681346
Validation Loss: 1.449238359928131
Validation Accuracy: 0.43333333333333335
Epoch 3/10:
Training Loss: 1.4093271891276042
Validation Loss: 1.3829328417778015
Validation Accuracy: 0.43333333333333335
running fed round 2 of Switzerland with 96 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.5859814435243607
Validation Loss: 1.5485363801320393
Validation Accuracy: 0.3157894736842105
Epoch 2/10:
Training Loss: 1.4736272394657135
Validation Loss: 1.4885266224543254
Validation Accuracy: 0.47368421052631576
Epoch 3/10:
Training Loss: 1.3865079134702682
Validation Loss: 1.446683128674825
Validation Accuracy: 0.3684210526315789
running fed round 2 of Belgium with 92 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.573267902646746
Validation Loss: 1.7813080946604412
Validation Accuracy: 0.16666666666666666
Epoch 2/10:
Training Loss: 1.572658998625619
Validation Loss: 1.7998775641123455
Validation Accuracy: 0.16666666666666666
Epoch 3/10:
Training Loss: 1.528589470045907
Validation Loss: 1.7683531045913696
Validation Accuracy: 0.16666666666666666
running fed round 2 of Spain with 142 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.5877887227318503
Validation Loss: 1.5703174471855164
Validation Accuracy: 0.39285714285714285
Epoch 2/10:
Training Loss: 1.4974236379970203
Validation Loss: 1.5142960250377655
Validation Accuracy: 0.42857142857142855
Epoch 3/10:
Training Loss: 1.3720678632909602
Validation Loss: 1.4238625466823578
Validation Accuracy: 0.5
running fed round 2 of Poland with 62 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.548956608772278
Validation Loss: 1.5740201473236084
Validation Accuracy: 0.4166666666666667
Epoch 2/10:
Training Loss: 1.416189432144165
Validation Loss: 1.5546189546585083
Validation Accuracy: 0.3333333333333333
Epoch 3/10:
Training Loss: 1.3551575183868407
Validation Loss: 1.512394368648529
Validation Accuracy: 0.5
running fed round 2 of CzechRepublic with 48 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.5903178453445435
Validation Loss: 1.555391550064087
Validation Accuracy: 0.4
Epoch 2/10:
Training Loss: 1.5580540299415588
Validation Loss: 1.5205222964286804
Validation Accuracy: 0.4
Epoch 3/10:
Training Loss: 1.4704530537128448
Validation Loss: 1.5346968173980713
Validation Accuracy: 0.4
running fed round 3 of Portugal with 59 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.4371025085449218
Validation Loss: 1.4540022015571594
Validation Accuracy: 0.3333333333333333
Epoch 2/10:
Training Loss: 1.3375170946121215
Validation Loss: 1.3894118070602417
Validation Accuracy: 0.5833333333333334
Epoch 3/10:
Training Loss: 1.2290229082107544
Validation Loss: 1.4307671785354614
Validation Accuracy: 0.4166666666666667
running fed round 3 of France with 116 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.4428170654508803
Validation Loss: 1.431772510210673
Validation Accuracy: 0.34782608695652173
Epoch 2/10:
Training Loss: 1.333155141936408
Validation Loss: 1.382499059041341
Validation Accuracy: 0.34782608695652173
Epoch 3/10:
Training Loss: 1.2378273142708673
Validation Loss: 1.3153810898462932
Validation Accuracy: 0.5217391304347826
running fed round 3 of Germany with 151 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.3819333414236705
Validation Loss: 1.3176977634429932
Validation Accuracy: 0.5
Epoch 2/10:
Training Loss: 1.2789764901002247
Validation Loss: 1.2374654114246368
Validation Accuracy: 0.5666666666666667
Epoch 3/10:
Training Loss: 1.1675847073396046
Validation Loss: 1.2382726967334747
Validation Accuracy: 0.4666666666666667
running fed round 3 of Switzerland with 96 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.513519898056984
Validation Loss: 1.4821909666061401
Validation Accuracy: 0.15789473684210525
Epoch 2/10:
Training Loss: 1.4023910760879517
Validation Loss: 1.4675338665644329
Validation Accuracy: 0.3157894736842105
Epoch 3/10:
Training Loss: 1.3825013786554337
Validation Loss: 1.3278539180755615
Validation Accuracy: 0.5789473684210527
running fed round 3 of Belgium with 92 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.4466326917920793
Validation Loss: 1.5779861211776733
Validation Accuracy: 0.2777777777777778
Epoch 2/10:
Training Loss: 1.377241781779698
Validation Loss: 1.3138809998830159
Validation Accuracy: 0.5555555555555556
Epoch 3/10:
Training Loss: 1.257302028792245
Validation Loss: 1.4887422323226929
Validation Accuracy: 0.3888888888888889
running fed round 3 of Spain with 142 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.438494617288763
Validation Loss: 1.3969627618789673
Validation Accuracy: 0.5714285714285714
Epoch 2/10:
Training Loss: 1.3489686142314563
Validation Loss: 1.3409819304943085
Validation Accuracy: 0.5357142857142857
Epoch 3/10:
Training Loss: 1.227274472063238
Validation Loss: 1.446249783039093
Validation Accuracy: 0.42857142857142855
running fed round 3 of Poland with 62 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.41477530002594
Validation Loss: 1.5350279808044434
Validation Accuracy: 0.4166666666666667
Epoch 2/10:
Training Loss: 1.3424928665161133
Validation Loss: 1.3651237487792969
Validation Accuracy: 0.5
Epoch 3/10:
Training Loss: 1.258006501197815
Validation Loss: 1.3418800830841064
Validation Accuracy: 0.5
running fed round 3 of CzechRepublic with 48 


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Epoch 1/10:
Training Loss: 1.4281367361545563
Validation Loss: 1.471983015537262
Validation Accuracy: 0.3
Epoch 2/10:
Training Loss: 1.3831543624401093
Validation Loss: 1.3411359190940857
Validation Accuracy: 0.5
Epoch 3/10:
Training Loss: 1.4277520775794983
Validation Loss: 1.436062514781952
Validation Accuracy: 0.4
