In [1]:
%%capture
!pip install transformers
!pip install evaluate
!pip install datasets
!pip install accelerate -U

In [2]:
from datasets import load_dataset, load_from_disk
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, Trainer, AutoConfig, TrainingArguments, DataCollatorWithPadding
import numpy as np
import evaluate

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


def train_model(train_path, train_file_name, validation_path, validation_file_name, model_save_path, dataset_id):

  train_dataset = load_from_disk(train_path+dataset_id+train_file_name)
  validation_dataset = load_from_disk(validation_path+dataset_id+validation_file_name)

  tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
  model = AutoModelForSequenceClassification.from_pretrained("microsoft/MiniLM-L12-H384-uncased")

  def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"])

  tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
  tokenized_validation_dataset = validation_dataset.map(tokenize_function, batched=True)

  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  training_args = TrainingArguments("estimate_trainer/"+dataset_id, num_train_epochs=70, learning_rate=1e-5,
                                  lr_scheduler_type="linear", per_device_train_batch_size=16, seed=42,
                                  per_device_eval_batch_size=16)

  trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_validation_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator
    )
  trainer.train()

  predictions = trainer.predict(tokenized_validation_dataset)
  preds = np.argmax(predictions.predictions, axis=-1)
  metric = evaluate.load("glue", "mrpc")
  validation_scores.append(metric.compute(predictions=preds, references=predictions.label_ids))
  trainer.save_model(model_save_path+"estimate_trainer/"+dataset_id)


In [5]:
train_path = "/content/drive/MyDrive/Federated_learning/Train_split/"
validation_path = "/content/drive/MyDrive/Federated_learning/Validation_split/"
model_save_path = "/content/drive/MyDrive/Federated_learning/Federated_models/"
import matplotlib.pyplot as plt

train_file_name = "/train.hf"
validation_file_name = "/validation.hf"

validation_scores = []

num_shards = 8

for dataset_id in range(num_shards):
  train_model(train_path, train_file_name, validation_path, validation_file_name, model_save_path, str(dataset_id))
  print(validation_scores[-1])

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/133M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.2651
1000,0.0217
1500,0.011
2000,0.0035


Downloading builder script:   0%|          | 0.00/5.75k [00:00<?, ?B/s]

{'accuracy': 0.8627450980392157, 'f1': 0.9041095890410958}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.2416
1000,0.0288
1500,0.0102
2000,0.0026


{'accuracy': 0.7647058823529411, 'f1': 0.8378378378378379}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.27
1000,0.0097
1500,0.0032
2000,0.0031


{'accuracy': 0.8431372549019608, 'f1': 0.8974358974358975}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.2701
1000,0.0562
1500,0.0341
2000,0.0075


{'accuracy': 0.8235294117647058, 'f1': 0.8767123287671234}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.2674
1000,0.0336
1500,0.0203
2000,0.0189


{'accuracy': 0.7450980392156863, 'f1': 0.8311688311688312}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.2861
1000,0.0547
1500,0.0289
2000,0.0186


{'accuracy': 0.803921568627451, 'f1': 0.8717948717948717}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.2433
1000,0.0312
1500,0.0085
2000,0.0044


{'accuracy': 0.8627450980392157, 'f1': 0.9014084507042254}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.2639
1000,0.051
1500,0.023
2000,0.0185


{'accuracy': 0.803921568627451, 'f1': 0.8571428571428571}


In [None]:
import numpy as np
import matplotlib.pyplot as plt
# Sample data for 8 different graphs (training loss for different batch sizes)
epochs = range(1, 5)
losses_graph1 = [0.254900,	0.032300, 0.008400,	0.004000]
losses_graph2 = [0.241300, 0.028000, 0.009800,	0.002800]
losses_graph3 = [0.269900,	0.009700,	0.003800,	0.003300]
losses_graph4 = [0.270100,	0.056300,	0.034500,	0.009900]
losses_graph5 = [0.267700,	0.037900,	0.018000,	0.018100]
losses_graph6 = [0.286000, 0.056200, 0.028300, 0.018900]
losses_graph7 = [0.243300,	0.031500, 0.008200, 0.003900]
losses_graph8 = [0.264000,	0.051900,	0.023200 ,0.018100]

# Plotting the graphs with different colors
plt.figure(figsize=(10, 6))
plt.xlabel('Training Batch Size')
plt.ylabel('Training Loss')
plt.title('Training Loss for 8 local Models')

plt.plot(epochs, losses_graph1, marker='o', color='b', label='Model 1')
plt.plot(epochs, losses_graph2, marker='o', color='g', label='Model 2')
plt.plot(epochs, losses_graph3, marker='o', color='r', label='Model 3')
plt.plot(epochs, losses_graph4, marker='o', color='c', label='Model 4')
plt.plot(epochs, losses_graph5, marker='o', color='m', label='Model 5')
plt.plot(epochs, losses_graph6, marker='o', color='y', label='Model 6')
plt.plot(epochs, losses_graph7, marker='o', color='orange', label='Model 7')
plt.plot(epochs, losses_graph8, marker='o', color='purple', label='Model 8')

plt.legend()
plt.grid(True)
plt.show()