In [2]:
!pip install datasets evaluate transformers seqeval peft

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading peft-0.13.2-py3-none-any.whl (320 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.7/320.7 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25ldone
[?25h  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=943f8216c0f33cc619d7a0502305b9a3f2551b669b4ab37eee

In [4]:
model_name = "xlm-roberta-large"
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, Trainer, TrainingArguments
from transformers import DataCollatorForTokenClassification
from datasets import Dataset
from peft import get_peft_model, LoraConfig
import evaluate

model_name = model_name  


# Load dataset and process data
def load_conll_data(file_path, is_test=False):
    sentences = []
    tokens, tags = [], []

    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith("#") or line.strip() == "":
                if tokens:
                    sentences.append((tokens, tags if not is_test else []))  # Append empty tags if test
                    tokens, tags = [], []
                continue
            parts = line.strip().split('\t')
            if len(parts) == 3:
                token, _, tag = parts
                tokens.append(token)
                if not is_test:
                    tags.append(tag) 
    if tokens:
        sentences.append((tokens, tags if not is_test else []))

    return sentences



train_sentences = load_conll_data('/kaggle/input/ner-hineng/train.conll')
dev_sentences = load_conll_data('/kaggle/input/ner-hineng/dev.conll')
test_sentences = load_conll_data('/kaggle/input/ner-hineng/test.conll', is_test=True)

print(train_sentences[0])

# Create a mapping of tags to IDs
def create_label_mapping(sentences):
    tags = set()
    for _, sentence_tags in sentences:
        if sentence_tags:
            tags.update(sentence_tags)
    label2id = {label: i for i, label in enumerate(sorted(tags))}
    return label2id


label2id = create_label_mapping(train_sentences)

print(label2id)

(['Gully', 'cricket', 'chal', 'raha', 'hain', 'yaha', '"', '(', 'Soniya', ')', 'Gandhi', '"'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'O', 'B-PERSON', 'O'])
{'B-ORGANISATION': 0, 'B-PERSON': 1, 'B-PLACE': 2, 'I-ORGANISATION': 3, 'I-PERSON': 4, 'I-PLACE': 5, 'O': 6}


In [5]:
def convert_to_hf_dataset(sentences):
    data = {'id': [], 'tokens': [], 'ner_tags': []}
    for idx, (tokens, tags) in enumerate(sentences):
        data['id'].append(idx)  # Use a simple integer ID
        data['tokens'].append(tokens)
        data['ner_tags'].append(tags)
    return Dataset.from_dict(data)


train_dataset = convert_to_hf_dataset(train_sentences)
dev_dataset = convert_to_hf_dataset(dev_sentences)
test_dataset = convert_to_hf_dataset(test_sentences)

print(train_dataset[0])

unique_tags = set(tag for _, tags in train_sentences for tag in tags)

tag2id = {
    'O': 0,
    'B-PERSON': 1,
    'I-PERSON': 2,
    'B-ORGANISATION': 3,
    'I-ORGANISATION': 4,
    'B-PLACE': 5,
    'I-PLACE': 6,
}

id2tag = {idx: tag for tag, idx in tag2id.items()}

def convert_labels_to_ids(example):
    example['ner_tags'] = [tag2id.get(label, -1) for label in example['ner_tags']]  
    return example


train_dataset = train_dataset.map(convert_labels_to_ids)
dev_dataset = dev_dataset.map(convert_labels_to_ids)
test_dataset = test_dataset.map(convert_labels_to_ids)
print(len(test_dataset))
print("Tag to ID mapping:")
for tag, id in tag2id.items():
    print(f"Tag: {tag}, ID: {id}")

print("\nID to Tag mapping:")
for id, tag in id2tag.items():
    print(f"ID: {id}, Tag: {tag}")

example = train_dataset[0]
print(example)

label_list = list(tag2id.keys())
print(label_list)

print(train_dataset.column_names)

{'id': 0, 'tokens': ['Gully', 'cricket', 'chal', 'raha', 'hain', 'yaha', '"', '(', 'Soniya', ')', 'Gandhi', '"'], 'ner_tags': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'O', 'B-PERSON', 'O']}


Map:   0%|          | 0/1678 [00:00<?, ? examples/s]

Map:   0%|          | 0/433 [00:00<?, ? examples/s]

0
Tag to ID mapping:
Tag: O, ID: 0
Tag: B-PERSON, ID: 1
Tag: I-PERSON, ID: 2
Tag: B-ORGANISATION, ID: 3
Tag: I-ORGANISATION, ID: 4
Tag: B-PLACE, ID: 5
Tag: I-PLACE, ID: 6

ID to Tag mapping:
ID: 0, Tag: O
ID: 1, Tag: B-PERSON
ID: 2, Tag: I-PERSON
ID: 3, Tag: B-ORGANISATION
ID: 4, Tag: I-ORGANISATION
ID: 5, Tag: B-PLACE
ID: 6, Tag: I-PLACE
{'id': 0, 'tokens': ['Gully', 'cricket', 'chal', 'raha', 'hain', 'yaha', '"', '(', 'Soniya', ')', 'Gandhi', '"'], 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]}
['O', 'B-PERSON', 'I-PERSON', 'B-ORGANISATION', 'I-ORGANISATION', 'B-PLACE', 'I-PLACE']
['id', 'tokens', 'ner_tags']


In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
example = train_dataset[0]
tokenized_inputs = tokenizer(
    example["tokens"],
    is_split_into_words=True,
    padding=True,
    truncation=True,
    return_tensors='pt',
)
tokens = tokenizer.convert_ids_to_tokens(tokenized_inputs['input_ids'][0].tolist())
print(tokens)
print(unique_tags)


def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs


tokenized_train_dataset = train_dataset.map(tokenize_and_align_labels, batched=True)
tokenized_dev_dataset = dev_dataset.map(tokenize_and_align_labels, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_and_align_labels, batched=True)

tokenized_train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_dev_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
print(train_dataset.column_names)  
print(tokenized_train_dataset.column_names)

print(train_dataset[0])
print(tokenized_train_dataset[1])

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, padding=True)

metric = evaluate.load("seqeval")
import numpy as np

labels = [label_list[i] for i in example["ner_tags"]]
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score

from seqeval.scheme import BILOU


from seqeval.metrics import f1_score


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

['<s>', '▁Gul', 'ly', '▁cri', 'cket', '▁chal', '▁raha', '▁hain', '▁ya', 'ha', '▁"', '▁(', '▁Son', 'iya', '▁)', '▁Gandhi', '▁"', '</s>']
{'O', 'I-PLACE', 'B-PLACE', 'I-ORGANISATION', 'B-PERSON', 'I-PERSON', 'B-ORGANISATION'}




Map:   0%|          | 0/1678 [00:00<?, ? examples/s]

Map:   0%|          | 0/433 [00:00<?, ? examples/s]

['id', 'tokens', 'ner_tags']
['id', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels']
{'id': 0, 'tokens': ['Gully', 'cricket', 'chal', 'raha', 'hain', 'yaha', '"', '(', 'Soniya', ')', 'Gandhi', '"'], 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]}
{'input_ids': tensor([     0,     87,   1884,     71,     10,   1374, 140668,   1202,   3975,
           696,     18,      5,    587,     64,    397,     39,    856,    864,
           127,   1511,    275,     85,    568, 195902,  12530,   1716,   1460,
            62,     11, 132507,   4368,      9, 117117,  18385,     20,  36635,
          6827,  76417,   3311,  56174,  16785,    936,      2]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 'labels': tensor([-100,    0,    0, -100,    0,    0, -100,    0,    0, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,    0,
  

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [7]:
lora_config = LoraConfig(
    r=8,  
    task_type="TOKEN_CLASSIFICATION",
    lora_alpha=32, 
    target_modules=["query", "value"],  
    lora_dropout=0.1,  
)

model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(tag2id), id2label=id2tag,label2id=tag2id)
model = get_peft_model(model, lora_config)

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
import evaluate
def compute_metrics(p):
    print("in compute metrics")
    preds = [[id2tag[i] for i in pred] for pred in p.predictions]
    refs = [[id2tag[i] for i in ref] for ref in p.labels]

    # Remove ignored index (usually -100)
    true_predictions = [
        [pred for pred, label in zip(prediction, label) if label != -100]
        for prediction, label in zip(preds, refs)
    ]
    true_labels = [
        [label for pred, label in zip(prediction, label) if label != -100]
        for prediction, label in zip(preds, refs)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }



In [9]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

print(len(tokenized_dev_dataset))

print(tokenized_dev_dataset[0])

from transformers import TrainingArguments, Trainer
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

433
{'input_ids': tensor([     0, 110833,  25813,      6,      4, 179635,   2363,      6,  66398,
         43904,    136,  92635,    111,     10,  23009,  18940,  37772,      6,
             5,   3975,    696,     18,      5,    587,     64,  46592,    910,
           434,    238,    866,    299,     92,    397,      2]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 'labels': tensor([-100,    0,    0,    0, -100,    0,    0,    0, -100,    0,    0,    0,
           0,    0,    0,    0,    0,    0, -100,    0, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100])}


In [10]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    num_train_epochs=1,
    do_train=True,
    report_to=[],  
    remove_unused_columns=False, 
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate = 2e-5,
    weight_decay = 0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_dev_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

from transformers import TrainerCallback


class PrintMetricsCallback(TrainerCallback):
    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        if metrics:
            # Print F1 and other evaluation metrics
            print(f"Epoch {int(state.epoch)} - Evaluation Metrics:")
            print(f"  F1 Score: {metrics.get('eval_f1', 'N/A')}")
            print(f"  Precision: {metrics.get('eval_precision', 'N/A')}")
            print(f"  Recall: {metrics.get('eval_recall', 'N/A')}")
            print(f"  Accuracy: {metrics.get('eval_accuracy', 'N/A')}\n")



print_metrics_callback = PrintMetricsCallback()


print(tokenized_dev_dataset[0])



{'input_ids': tensor([     0, 110833,  25813,      6,      4, 179635,   2363,      6,  66398,
         43904,    136,  92635,    111,     10,  23009,  18940,  37772,      6,
             5,   3975,    696,     18,      5,    587,     64,  46592,    910,
           434,    238,    866,    299,     92,    397,      2]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 'labels': tensor([-100,    0,    0,    0, -100,    0,    0,    0, -100,    0,    0,    0,
           0,    0,    0,    0,    0,    0, -100,    0, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100])}


In [11]:
def evaluate_ner(trainer, eval_dataset):
    predictions_tuple = trainer.predict(eval_dataset)
    logits = predictions_tuple.predictions[1]

    if len(logits.shape) == 3:
        predicted_labels = np.argmax(logits, axis=-1)
    else:
        raise ValueError(f"Unexpected shape for logits: {logits.shape}")

    true_labels_flat = []
    predicted_labels_flat = []
    padding_label = -100  # Assuming -100 is used for ignored labels

    for i in range(len(eval_dataset)):
        true_label_seq = eval_dataset[i]["labels"]
        pred_label_seq = predicted_labels[i]

        # Ignore padding labels and -100 labels in true labels
        true_labels_flat.extend(
            [label for label in true_label_seq if label != padding_label and label != 0])  # Adjusting for 0 as well
        predicted_labels_flat.extend(
            [pred for pred, label in zip(pred_label_seq, true_label_seq) if label != padding_label and label != 0])

    true_labels_flat = np.array(true_labels_flat)
    predicted_labels_flat = np.array(predicted_labels_flat)

    if len(predicted_labels_flat) != len(true_labels_flat):
        print(
            f"Warning: Predicted labels length ({len(predicted_labels_flat)}) does not match true labels length ({len(true_labels_flat)}). Adjusting...")
        min_length = min(len(predicted_labels_flat), len(true_labels_flat))
        predicted_labels_flat = predicted_labels_flat[:min_length]
        true_labels_flat = true_labels_flat[:min_length]

    precision = precision_score(true_labels_flat, predicted_labels_flat, average="macro", zero_division=0)
    recall = recall_score(true_labels_flat, predicted_labels_flat, average="macro", zero_division=0)
    f1 = f1_score(true_labels_flat, predicted_labels_flat, average="macro", zero_division=0)
    accuracy = accuracy_score(true_labels_flat, predicted_labels_flat)

    print(
        f"Evaluation Metrics:\n F1: {f1:.4f}\n Precision: {precision:.4f}\n Recall: {recall:.4f}\n Accuracy: {accuracy:.4f}")
    return {"f1": f1, "precision": precision, "recall": recall, "accuracy": accuracy}

In [12]:
import os

best_model_dir = "./best_model"
os.makedirs(best_model_dir, exist_ok=True)

In [13]:
maxf1 = 0
savemodel = model
maxidx = 0

In [16]:
for i in range(50):
  print(101 + i)
  trainer.train()
  eval_results = evaluate_ner(trainer, tokenized_dev_dataset)
  if(eval_results['f1'] > maxf1):
      maxf1 = eval_results['f1']
      savemodel = model
      maxidx = i
      model.save_pretrained(best_model_dir)
      tokenizer.save_pretrained(best_model_dir)
  print("Final Evaluation Results:", eval_results)

101


Epoch,Training Loss,Validation Loss
1,0.0626,No log


Evaluation Metrics:
 F1: 0.6750
 Precision: 0.7410
 Recall: 0.6430
 Accuracy: 0.8226
Final Evaluation Results: {'f1': 0.6749926877869435, 'precision': 0.7410338191371479, 'recall': 0.6429587783562061, 'accuracy': 0.8226086956521739}
102


Epoch,Training Loss,Validation Loss
1,0.0649,No log


Evaluation Metrics:
 F1: 0.6749
 Precision: 0.7331
 Recall: 0.6409
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6749407711265885, 'precision': 0.7331138798991547, 'recall': 0.6408583712642353, 'accuracy': 0.8330434782608696}
103


Epoch,Training Loss,Validation Loss
1,0.0621,No log


Evaluation Metrics:
 F1: 0.7009
 Precision: 0.7614
 Recall: 0.6643
 Accuracy: 0.8383
Final Evaluation Results: {'f1': 0.7009077338969819, 'precision': 0.7613919820789166, 'recall': 0.6643409173877917, 'accuracy': 0.8382608695652174}
104


Epoch,Training Loss,Validation Loss
1,0.0572,No log


Evaluation Metrics:
 F1: 0.6764
 Precision: 0.7420
 Recall: 0.6449
 Accuracy: 0.8296
Final Evaluation Results: {'f1': 0.6764230712681595, 'precision': 0.7419979532019992, 'recall': 0.644855167478526, 'accuracy': 0.8295652173913044}
105


Epoch,Training Loss,Validation Loss
1,0.0605,No log


Evaluation Metrics:
 F1: 0.6839
 Precision: 0.7396
 Recall: 0.6508
 Accuracy: 0.8278
Final Evaluation Results: {'f1': 0.6838698541460132, 'precision': 0.7395562714470898, 'recall': 0.650804320219864, 'accuracy': 0.8278260869565217}
106


Epoch,Training Loss,Validation Loss
1,0.0619,No log


Evaluation Metrics:
 F1: 0.6840
 Precision: 0.7473
 Recall: 0.6520
 Accuracy: 0.8296
Final Evaluation Results: {'f1': 0.6840230001147305, 'precision': 0.7473125873494136, 'recall': 0.6519816028019301, 'accuracy': 0.8295652173913044}
107


Epoch,Training Loss,Validation Loss
1,0.0599,No log


Evaluation Metrics:
 F1: 0.6949
 Precision: 0.7485
 Recall: 0.6605
 Accuracy: 0.8296
Final Evaluation Results: {'f1': 0.6949407966054932, 'precision': 0.7485039548743982, 'recall': 0.6605473927493317, 'accuracy': 0.8295652173913044}
108


Epoch,Training Loss,Validation Loss
1,0.0611,No log


Evaluation Metrics:
 F1: 0.7022
 Precision: 0.7524
 Recall: 0.6681
 Accuracy: 0.8313
Final Evaluation Results: {'f1': 0.70216575555459, 'precision': 0.7524000587705021, 'recall': 0.668066189741813, 'accuracy': 0.831304347826087}
109


Epoch,Training Loss,Validation Loss
1,0.0598,No log


Evaluation Metrics:
 F1: 0.6863
 Precision: 0.7493
 Recall: 0.6530
 Accuracy: 0.8278
Final Evaluation Results: {'f1': 0.6862765594296255, 'precision': 0.7492965867462583, 'recall': 0.6530285957568506, 'accuracy': 0.8278260869565217}
110


Epoch,Training Loss,Validation Loss
1,0.0585,No log


Evaluation Metrics:
 F1: 0.6963
 Precision: 0.7579
 Recall: 0.6613
 Accuracy: 0.8348
Final Evaluation Results: {'f1': 0.696307416352497, 'precision': 0.7578792141493829, 'recall': 0.6612533056811755, 'accuracy': 0.8347826086956521}
111


Epoch,Training Loss,Validation Loss
1,0.0589,No log


Evaluation Metrics:
 F1: 0.7205
 Precision: 0.7688
 Recall: 0.6838
 Accuracy: 0.8365
Final Evaluation Results: {'f1': 0.7205274824448245, 'precision': 0.7687748100441704, 'recall': 0.6837512474366687, 'accuracy': 0.8365217391304348}
112


Epoch,Training Loss,Validation Loss
1,0.056,No log


Evaluation Metrics:
 F1: 0.7003
 Precision: 0.7642
 Recall: 0.6610
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.7003116685850143, 'precision': 0.7641853598712005, 'recall': 0.660959171634114, 'accuracy': 0.8330434782608696}
113


Epoch,Training Loss,Validation Loss
1,0.0627,No log


Evaluation Metrics:
 F1: 0.7071
 Precision: 0.7678
 Recall: 0.6674
 Accuracy: 0.8313
Final Evaluation Results: {'f1': 0.7071448198799345, 'precision': 0.7678191207558801, 'recall': 0.6673531092340192, 'accuracy': 0.831304347826087}
114


Epoch,Training Loss,Validation Loss
1,0.0602,No log


Evaluation Metrics:
 F1: 0.7143
 Precision: 0.7721
 Recall: 0.6744
 Accuracy: 0.8348
Final Evaluation Results: {'f1': 0.7143186147813336, 'precision': 0.7721493429067195, 'recall': 0.6743679257521629, 'accuracy': 0.8347826086956521}
115


Epoch,Training Loss,Validation Loss
1,0.0557,No log


Evaluation Metrics:
 F1: 0.7085
 Precision: 0.7540
 Recall: 0.6733
 Accuracy: 0.8313
Final Evaluation Results: {'f1': 0.7084515400803967, 'precision': 0.7540331517222273, 'recall': 0.6733281003731924, 'accuracy': 0.831304347826087}
116


Epoch,Training Loss,Validation Loss
1,0.06,No log


Evaluation Metrics:
 F1: 0.7050
 Precision: 0.7521
 Recall: 0.6698
 Accuracy: 0.8365
Final Evaluation Results: {'f1': 0.7049708513316233, 'precision': 0.7520957332179501, 'recall': 0.6698385128442822, 'accuracy': 0.8365217391304348}
117


Epoch,Training Loss,Validation Loss
1,0.0612,No log


Evaluation Metrics:
 F1: 0.6895
 Precision: 0.7554
 Recall: 0.6542
 Accuracy: 0.8313
Final Evaluation Results: {'f1': 0.6894839712094942, 'precision': 0.7554038385113495, 'recall': 0.654238489163032, 'accuracy': 0.831304347826087}
118


Epoch,Training Loss,Validation Loss
1,0.0561,No log


Evaluation Metrics:
 F1: 0.6961
 Precision: 0.7440
 Recall: 0.6633
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6961188613718526, 'precision': 0.7440435918217975, 'recall': 0.6632684811847058, 'accuracy': 0.8330434782608696}
119


Epoch,Training Loss,Validation Loss
1,0.056,No log


Evaluation Metrics:
 F1: 0.6897
 Precision: 0.7572
 Recall: 0.6531
 Accuracy: 0.8278
Final Evaluation Results: {'f1': 0.6896914553749564, 'precision': 0.7572480924984607, 'recall': 0.653113629770456, 'accuracy': 0.8278260869565217}
120


Epoch,Training Loss,Validation Loss
1,0.058,No log


Evaluation Metrics:
 F1: 0.7031
 Precision: 0.7688
 Recall: 0.6629
 Accuracy: 0.8365
Final Evaluation Results: {'f1': 0.7030661892020904, 'precision': 0.7688339030402581, 'recall': 0.662882145548089, 'accuracy': 0.8365217391304348}
121


Epoch,Training Loss,Validation Loss
1,0.0547,No log


Evaluation Metrics:
 F1: 0.6976
 Precision: 0.7509
 Recall: 0.6600
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6976415406349957, 'precision': 0.7509392158324412, 'recall': 0.6599777954770938, 'accuracy': 0.8330434782608696}
122


Epoch,Training Loss,Validation Loss
1,0.0536,No log


Evaluation Metrics:
 F1: 0.6965
 Precision: 0.7448
 Recall: 0.6619
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6965140408432353, 'precision': 0.7448043818699229, 'recall': 0.6619405477911341, 'accuracy': 0.8330434782608696}
123


Epoch,Training Loss,Validation Loss
1,0.0536,No log


Evaluation Metrics:
 F1: 0.6848
 Precision: 0.7421
 Recall: 0.6500
 Accuracy: 0.8261
Final Evaluation Results: {'f1': 0.6848367314115669, 'precision': 0.742069344038611, 'recall': 0.6499735948743497, 'accuracy': 0.8260869565217391}
124


Epoch,Training Loss,Validation Loss
1,0.055,No log


Evaluation Metrics:
 F1: 0.6783
 Precision: 0.7496
 Recall: 0.6397
 Accuracy: 0.8296
Final Evaluation Results: {'f1': 0.6782998247937951, 'precision': 0.7495546097470325, 'recall': 0.6397335118716594, 'accuracy': 0.8295652173913044}
125


Epoch,Training Loss,Validation Loss
1,0.0551,No log


Evaluation Metrics:
 F1: 0.6901
 Precision: 0.7396
 Recall: 0.6566
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6901046516610273, 'precision': 0.7396292526346435, 'recall': 0.6565804095377391, 'accuracy': 0.8330434782608696}
126


Epoch,Training Loss,Validation Loss
1,0.055,No log


Evaluation Metrics:
 F1: 0.6934
 Precision: 0.7554
 Recall: 0.6576
 Accuracy: 0.8313
Final Evaluation Results: {'f1': 0.6933760764274751, 'precision': 0.7554445899320978, 'recall': 0.6576358751023864, 'accuracy': 0.831304347826087}
127


Epoch,Training Loss,Validation Loss
1,0.0585,No log


Evaluation Metrics:
 F1: 0.6895
 Precision: 0.7464
 Recall: 0.6548
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6894991326943464, 'precision': 0.7463761059121883, 'recall': 0.6548009188593199, 'accuracy': 0.8330434782608696}
128


Epoch,Training Loss,Validation Loss
1,0.0516,No log


Evaluation Metrics:
 F1: 0.6897
 Precision: 0.7401
 Recall: 0.6555
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6896965876846257, 'precision': 0.7401336812360674, 'recall': 0.6555139993671135, 'accuracy': 0.8330434782608696}
129


Epoch,Training Loss,Validation Loss
1,0.0552,No log


Evaluation Metrics:
 F1: 0.7050
 Precision: 0.7588
 Recall: 0.6682
 Accuracy: 0.8313
Final Evaluation Results: {'f1': 0.7050279435464734, 'precision': 0.758775852429582, 'recall': 0.6681512237554184, 'accuracy': 0.831304347826087}
130


Epoch,Training Loss,Validation Loss
1,0.0559,No log


Evaluation Metrics:
 F1: 0.7052
 Precision: 0.7662
 Recall: 0.6675
 Accuracy: 0.8348
Final Evaluation Results: {'f1': 0.7051951483203045, 'precision': 0.7662186028257457, 'recall': 0.6674965924695749, 'accuracy': 0.8347826086956521}
131


Epoch,Training Loss,Validation Loss
1,0.0519,No log


Evaluation Metrics:
 F1: 0.6950
 Precision: 0.7532
 Recall: 0.6572
 Accuracy: 0.8243
Final Evaluation Results: {'f1': 0.6949790268225099, 'precision': 0.7532258252624482, 'recall': 0.657165646995654, 'accuracy': 0.8243478260869566}
132


Epoch,Training Loss,Validation Loss
1,0.0529,No log


Evaluation Metrics:
 F1: 0.7094
 Precision: 0.7647
 Recall: 0.6733
 Accuracy: 0.8313
Final Evaluation Results: {'f1': 0.7093986818135425, 'precision': 0.764664923578489, 'recall': 0.6733281003731924, 'accuracy': 0.831304347826087}
133


Epoch,Training Loss,Validation Loss
1,0.0509,No log


Evaluation Metrics:
 F1: 0.7199
 Precision: 0.7687
 Recall: 0.6862
 Accuracy: 0.8365
Final Evaluation Results: {'f1': 0.7199025001546259, 'precision': 0.7687496731287872, 'recall': 0.6862438186228816, 'accuracy': 0.8365217391304348}
134


Epoch,Training Loss,Validation Loss
1,0.0557,No log


Evaluation Metrics:
 F1: 0.6896
 Precision: 0.7540
 Recall: 0.6555
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6896446007805139, 'precision': 0.7540383866551104, 'recall': 0.6555139993671135, 'accuracy': 0.8330434782608696}
135


Epoch,Training Loss,Validation Loss
1,0.0503,No log


Evaluation Metrics:
 F1: 0.7066
 Precision: 0.7623
 Recall: 0.6692
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.7066081812207786, 'precision': 0.7623392211378349, 'recall': 0.6692136593211809, 'accuracy': 0.8330434782608696}
136


Epoch,Training Loss,Validation Loss
1,0.0541,No log


Evaluation Metrics:
 F1: 0.6741
 Precision: 0.7497
 Recall: 0.6343
 Accuracy: 0.8191
Final Evaluation Results: {'f1': 0.67407538566531, 'precision': 0.7497195816783445, 'recall': 0.6343111473662858, 'accuracy': 0.8191304347826087}
137


Epoch,Training Loss,Validation Loss
1,0.0512,No log


Evaluation Metrics:
 F1: 0.6978
 Precision: 0.7549
 Recall: 0.6606
 Accuracy: 0.8296
Final Evaluation Results: {'f1': 0.6978029845973764, 'precision': 0.7548797485334781, 'recall': 0.6606324267629372, 'accuracy': 0.8295652173913044}
138


Epoch,Training Loss,Validation Loss
1,0.054,No log


Evaluation Metrics:
 F1: 0.6978
 Precision: 0.7536
 Recall: 0.6618
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.6978322244979981, 'precision': 0.7535643679878145, 'recall': 0.6617572861555131, 'accuracy': 0.8330434782608696}
139


Epoch,Training Loss,Validation Loss
1,0.0492,No log


Evaluation Metrics:
 F1: 0.7201
 Precision: 0.7700
 Recall: 0.6855
 Accuracy: 0.8365
Final Evaluation Results: {'f1': 0.7201270279122968, 'precision': 0.7699942387740187, 'recall': 0.685530738115088, 'accuracy': 0.8365217391304348}
140


Epoch,Training Loss,Validation Loss
1,0.0486,No log


Evaluation Metrics:
 F1: 0.7031
 Precision: 0.7538
 Recall: 0.6658
 Accuracy: 0.8296
Final Evaluation Results: {'f1': 0.7030810755178376, 'precision': 0.7538148627434341, 'recall': 0.6658093033807111, 'accuracy': 0.8295652173913044}
141


Epoch,Training Loss,Validation Loss
1,0.0541,No log


Evaluation Metrics:
 F1: 0.7065
 Precision: 0.7658
 Recall: 0.6702
 Accuracy: 0.8330
Final Evaluation Results: {'f1': 0.7065444553249717, 'precision': 0.7658070387515848, 'recall': 0.6702248484808991, 'accuracy': 0.8330434782608696}
142


Epoch,Training Loss,Validation Loss
1,0.0521,No log


Evaluation Metrics:
 F1: 0.6961
 Precision: 0.7487
 Recall: 0.6589
 Accuracy: 0.8296
Final Evaluation Results: {'f1': 0.6961464084066229, 'precision': 0.7486912563443174, 'recall': 0.6588529360845179, 'accuracy': 0.8295652173913044}
143


Epoch,Training Loss,Validation Loss
1,0.0485,No log


Evaluation Metrics:
 F1: 0.6817
 Precision: 0.7558
 Recall: 0.6419
 Accuracy: 0.8226
Final Evaluation Results: {'f1': 0.6817244085560917, 'precision': 0.7558386192355627, 'recall': 0.6418923681855804, 'accuracy': 0.8226086956521739}
144


Epoch,Training Loss,Validation Loss
1,0.0523,No log


Evaluation Metrics:
 F1: 0.6874
 Precision: 0.7437
 Recall: 0.6533
 Accuracy: 0.8261
Final Evaluation Results: {'f1': 0.6873765412859102, 'precision': 0.7437076223768982, 'recall': 0.6532642805819616, 'accuracy': 0.8260869565217391}
145


Epoch,Training Loss,Validation Loss
1,0.0474,No log


Evaluation Metrics:
 F1: 0.6801
 Precision: 0.7358
 Recall: 0.6438
 Accuracy: 0.8261
Final Evaluation Results: {'f1': 0.6800562296918594, 'precision': 0.7358436197548542, 'recall': 0.6438153420995555, 'accuracy': 0.8260869565217391}
146


Epoch,Training Loss,Validation Loss
1,0.0494,No log


Evaluation Metrics:
 F1: 0.7011
 Precision: 0.7519
 Recall: 0.6648
 Accuracy: 0.8348
Final Evaluation Results: {'f1': 0.7011180555830693, 'precision': 0.7518897658376129, 'recall': 0.664812287038014, 'accuracy': 0.8347826086956521}
147


Epoch,Training Loss,Validation Loss
1,0.0501,No log


Evaluation Metrics:
 F1: 0.7148
 Precision: 0.7607
 Recall: 0.6790
 Accuracy: 0.8348
Final Evaluation Results: {'f1': 0.7148205796004345, 'precision': 0.7607181265842483, 'recall': 0.678993317279627, 'accuracy': 0.8347826086956521}
148


Epoch,Training Loss,Validation Loss
1,0.0493,No log


Evaluation Metrics:
 F1: 0.7048
 Precision: 0.7520
 Recall: 0.6698
 Accuracy: 0.8365
Final Evaluation Results: {'f1': 0.7047661400652595, 'precision': 0.7519652767867052, 'recall': 0.6698385128442822, 'accuracy': 0.8365217391304348}
149


Epoch,Training Loss,Validation Loss
1,0.05,No log


Evaluation Metrics:
 F1: 0.6920
 Precision: 0.7534
 Recall: 0.6488
 Accuracy: 0.8174
Final Evaluation Results: {'f1': 0.6919622033663279, 'precision': 0.7534352033450807, 'recall': 0.6487793416538451, 'accuracy': 0.8173913043478261}
150


Epoch,Training Loss,Validation Loss
1,0.0495,No log


Evaluation Metrics:
 F1: 0.6938
 Precision: 0.7518
 Recall: 0.6530
 Accuracy: 0.8226
Final Evaluation Results: {'f1': 0.6938201528835056, 'precision': 0.7518321474226662, 'recall': 0.6530442359425274, 'accuracy': 0.8226086956521739}


In [17]:
print(maxf1)
print(maxidx)

0.7205274824448245
10
