In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
torch.cuda.empty_cache()

In [3]:
from transformers import T5Tokenizer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

def preprocess_and_tokenize(file_path):
    """
    Reads the file, preprocesses it, and tokenizes the data.
    """
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    input_texts = []
    target_labels = []
    
    current_tokens = []
    current_labels = []
    
    for line in lines:
        line = line.strip()
        if line:  # Non-empty line
            token, label = line.split()
            current_tokens.append(token)
            current_labels.append(label)
        else:  # Empty line signifies end of a sentence
            if current_tokens:
                input_texts.append(' '.join(current_tokens))
                target_labels.append(' '.join(current_labels))
                # Reset for next sentence
                current_tokens = []
                current_labels = []

    # Add the last sentence if the data doesn't end with an empty line
    if current_tokens:
        input_texts.append(' '.join(current_tokens))
        target_labels.append(' '.join(current_labels))
    
    return input_texts, target_labels

In [4]:
# Example usage
file_path = 'NER/Training_Data/AutoLabelledSet.txt'  # Replace with your file path
input_texts, target_labels = preprocess_and_tokenize(file_path)
print(input_texts[:5])
print(target_labels[:5])
print(len(input_texts), len(target_labels))

['the jubilee domain contains a complex sequence of ultramafic mafic rocks and interleaved sedimentary rocks , overlain by polymictic conglomerate .', 'extensive weathered banded quartz magnetite rocks occur throughout the project area .', 'the most common and significant metaliferrous rock types in the area are metamorphosed banded iron formations , ( bif ) and granular iron formations , ( gif ) .', 'these can be either completely oxidised medium to coarse grain hematite or more stable magnetite .', 'a major volcanic centre , defined by abundant felsic volcanics and quartz aluminosilicatechloritoid rocks ( considered to represent metamorphosed alteration assemblages ) , occurs in the central portion of this domain .']
['O B-LOCATION O O O O O O B-ROCK I-ROCK I-ROCK O O B-ROCK I-ROCK O O O B-ROCK I-ROCK O', 'O O O B-MINERAL B-MINERAL O O O O O O O', 'O O O O O B-ROCK I-ROCK O O O O O O B-ROCK I-ROCK I-ROCK O O B-ROCK O O B-ROCK I-ROCK I-ROCK O O B-ROCK O O', 'O O O O O O O O O O B-MINE

In [5]:
test_file = "NER/Training_Data/EvaluationSet.txt"
test_inputs, test_labels = preprocess_and_tokenize(test_file)
print(test_inputs[:5])
print(test_labels[:5])
print(len(test_inputs), len(test_labels))

['The geology of the hole was dominated by felsic schists and granites .', 'The mineralisation was characterised by traces of disseminated pyrite with zones of trace pyrrhotite and chalcopyrite in felsic schist .', 'The best mineralisation was intersected in felsic schists below the interpreted position of the VTEM plate model .', 'The geology of the hole was ultramafic schists overlying amphibolite and amphibolitic schists with two 1 m wide weakly sulphidic quartz veins .', 'These quartz veins were characterised by green colouration with traces of magnetite and disseminated pyrite , however contained no anomalous chemistry .']
['O O O O O O O O B-ROCK I-ROCK O B-ROCK O', 'O O O O O O O O B-MINERAL O O O O B-MINERAL O B-MINERAL O B-ROCK I-ROCK O', 'O O O O O O B-ROCK I-ROCK O O O O O O O O O O', 'O O O O O O B-ROCK I-ROCK O B-ROCK O B-ROCK I-ROCK O O O O O O O B-ROCK I-ROCK O', 'O B-ROCK I-ROCK O O O O O O O O B-MINERAL O O B-MINERAL O O O O O O O']
2001 2001


In [6]:
import json


def preprocess_and_tokenize_json(file_name):
    with open(file_name, 'r') as json_file:
        data = json.load(json_file)

    # Initialize lists for sentences and labels
    sentences = []
    labels = []

    # Process each entry in the JSON data
    for entry in data:
        output = entry['output']
        label = entry['labels']
        
        # Join the output list into a string sentence
        sentence = ' '.join(output).lower()
        
        # Join the labels list into a string
        label_str = ' '.join(label)
        
        # Append to lists
        sentences.append(sentence)
        labels.append(label_str)

    return sentences, labels

In [7]:
# Example usage
file_name = "NER/Training_Data/geotimeLabels.json"  # Replace with your file path
geotime_text, geotime_labels = preprocess_and_tokenize_json(file_name)

print(geotime_text[:5])
print(geotime_labels[:5])
print(len(geotime_text), len(geotime_labels))

['around 1070 ma , the region experienced significant volcanic activity .', 'the formation of the supercontinent pannotia occurred approximately 600 ma .', 'the last glacial maximum occurred roughly 20 ka ago .', 'evidence suggests that the andean orogeny began around 200 ma .', 'the cambrian explosion took place approximately 541 ma .']
['O B-GEO_TIME I-GEO_TIME O O O O O O O O', 'O O O O O O O O B-GEO_TIME I-GEO_TIME O', 'O O O O O O B-GEO_TIME I-GEO_TIME O O', 'O O O O O O O O B-GEO_TIME I-GEO_TIME O', 'O B-TIMESCALE O O O O B-GEO_TIME I-GEO_TIME O']
87 87


In [8]:
# file_name = "NER/Training_Data/geotimeLabels.json"  # Replace with your file path
# with open(file_name, 'r') as json_file:
#         data = json.load(json_file)

# for i in range(len(data)):
#     data[i]['labels'] = ['O' if x == '0' else x for x in data[i]['labels']]

# with open(file_name, 'w') as json_file:
#     json.dump(data, json_file)

In [9]:
# Example usage
file_name = "NER/Training_Data/DomainDictionary.json"  # Replace with your file path
dictionary_words, dictionary_labels = preprocess_and_tokenize_json(file_name)

print(dictionary_words[:5])
print(dictionary_labels[:5])
print(len(dictionary_words), len(dictionary_labels))

['aalenian', 'abereiddian', 'acadian', 'actonian', 'adelaidean']
['B-TIMESCALE', 'B-TIMESCALE', 'B-TIMESCALE', 'B-TIMESCALE', 'B-TIMESCALE']
16920 16920


In [10]:
dd_words = []
dd_labels = []

sentence = ""
labels = ""
for i in range(len(dictionary_words)):
    word = dictionary_words[i]
    label = dictionary_labels[i]
    if len(sentence.split(" ")) + len(word.split(" ")) > 60:
        dd_words.append(sentence)
        dd_labels.append(labels)
        sentence = ""
        labels = ""
        sentence += word
        labels += label
    else:
        if sentence == "":
            sentence += word
            labels += label
        else:
            sentence += " , " + word
            labels += " O " + label

In [11]:
print(dd_words[:5])
print(dd_labels[:5])
print(len(dd_words), len(dd_labels))

print(len(dd_words[0].split(" ")))

['aalenian , abereiddian , acadian , actonian , adelaidean , aegean , aeronian , aksayan , aktastinian , alaunian , albertan , albian , aldingian , alexandrian , alportian , altonian , amgan , animikean , anisian , aphebian , aptian , aquitanian , aratauran , archean , archeozoic , arenig , arenigian , arikareean , aritan , arnsbergian , arowhanan', 'artinskian , arundian , asbian , ashgill , asselian , astian , atdabanian , atokan/derryan , atokan , aurelucian , austinian , auversian , awamoan , ayusokkanian , azoic , baigendzinian , bairnsdalian , baishaean , bajocian , bala , balan , balcombian , bananian , baotan , barremian , barstovian , bartonian , bashkirian , basin , batesfordian , bathonian', 'batyrbayan , begudian , bendigonian , berriasian , bithynian , black , blackriveran , blackriverian , blancan , bolindian , bolsovian , boomerangian , bortonian , botomian , braxtonian , bridgerian , brigantian , bulitian , buntsandstein , burdigalian , burrellian , burzyan , caerfai , 

In [12]:
# from sklearn.model_selection import train_test_split

# # Split data into training and testing sets
# train_texts, test_texts, train_labels, test_labels = train_test_split(
#     input_texts, target_labels, test_size=0.2, random_state=42
# )

# print(f"Number of training samples: {len(train_texts)}")
# print(f"Number of testing samples: {len(test_texts)}")

# print(train_texts[:5])

In [13]:
def tokenize_data(texts, labels, tokenizer):
    """
    Tokenizes the input texts and labels.
    """
    inputs = tokenizer(texts, padding="max_length", truncation=True, max_length=256, return_tensors="pt")
    targets = tokenizer(labels, padding="max_length", truncation=True, max_length=256, return_tensors="pt")
    
    # Ensure labels are the same length as inputs
    inputs['labels'] = targets['input_ids']
    
    return inputs


In [14]:
import torch

class NERDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx].clone().detach()
        return item

    def __len__(self):
        return len(self.labels)


In [15]:
train_input = [words for words in input_texts] 
train_input.extend([words for words in geotime_text])
train_input.extend([words for words in dd_words])

train_labels = [labels for labels in target_labels]
train_labels.extend([labels for labels in geotime_labels])
train_labels.extend([labels for labels in dd_labels])

print(train_input[-1])
print(len(train_input))
print(len(train_labels))


yuinmery volcanics member , yule granitic complex , yulleroo formation , yulyupunyu granitic gneiss , yumba formation , yumurrpa granophyre , yundi sandy loam , yungkulungu formation , yunkanjini granite , yunta well leucogranite , yununba granite , yurabi formation , yuruga formation , zamia creek siltstone , zamu dolerite , zebra hill suite , zeepaard formation , ziggy monzogranite
32815
32815


In [16]:
# Initialize tokenizer
# tokenizer = AutoTokenizer.from_pretrained("t5-small")
tokenizer = AutoTokenizer.from_pretrained("dbmdz/t5-base-conll03-english")
# tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")

# Tokenize training and testing data
train_data = tokenize_data(train_input, train_labels, tokenizer)
test_data = tokenize_data(test_inputs, test_labels, tokenizer)


# Print tokenized data for verification
print("Training Input IDs:", train_data['input_ids'][0])
print("Training Labels:", train_data['labels'][0])
print("Testing Input IDs:", test_data['input_ids'])
print("Testing Labels:", test_data['labels'])

# Prepare datasets
train_dataset = NERDataset(train_data, train_data['labels'])
test_dataset = NERDataset(test_data, test_data['labels'])

from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=32, num_workers=12, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, num_workers=12)

# Print or return the shape of the inputs and labels
input_shape = train_data['input_ids'].shape
label_shape = train_data['input_ids'].shape

print(f"Input Shape: {input_shape}")
print(f"Label Shape: {label_shape}")

# Print or return the shape of the inputs and labels
test_input_shape = test_data['input_ids'].shape
test_label_shape = test_data['input_ids'].shape

print(f"Input Shape: {test_input_shape}")
print(f"Label Shape: {test_label_shape}")

Training Input IDs: tensor([    8,     3,  2047,  3727,    15,    15,  3303,  2579,     3,     9,
         1561,  5932,    13,  6173,    51,     9,  4638,   954,  4638, 12288,
           11,  1413,   109,     9,   162,    26, 23474,  1208, 12288,     3,
            6,   147,   521,    77,    57,  4251,  3113,  1225,   975, 24422,
          342,     3,     5,     1,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,  

In [17]:
from transformers import Trainer, TrainingArguments, T5ForConditionalGeneration

# Initialize model
model = AutoModelForSeq2SeqLM.from_pretrained("dbmdz/t5-base-conll03-english").to(device)
# model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base").to(device)
# model = T5ForConditionalGeneration.from_pretrained("t5-small").to(device)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    logging_dir='./logs',
    logging_steps=100,
    evaluation_strategy="steps",
    gradient_accumulation_steps=8,
    save_steps=15000,
    eval_steps=15000,
    fp16=True,
    do_train=True,
    do_eval=True,
    do_predict=True,
)


# Define metrics computation
from sklearn.metrics import f1_score
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    # Flatten the lists
    labels = labels.flatten()
    predictions = predictions.flatten()
    return {
        'f1': f1_score(labels, predictions, average='weighted')
    }

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)



dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [18]:
# Train the model
trainer.train()

  0%|          | 0/10250 [00:00<?, ?it/s]

{'loss': 0.1756, 'grad_norm': 0.06519773602485657, 'learning_rate': 4.951219512195122e-05, 'epoch': 0.1}
{'loss': 0.0697, 'grad_norm': 0.08146729320287704, 'learning_rate': 4.902439024390244e-05, 'epoch': 0.2}
{'loss': 0.0612, 'grad_norm': 0.14215214550495148, 'learning_rate': 4.853658536585366e-05, 'epoch': 0.29}
{'loss': 0.0545, 'grad_norm': 0.08234860748052597, 'learning_rate': 4.804878048780488e-05, 'epoch': 0.39}
{'loss': 0.0468, 'grad_norm': 0.27755144238471985, 'learning_rate': 4.75609756097561e-05, 'epoch': 0.49}
{'loss': 0.0418, 'grad_norm': 0.16214203834533691, 'learning_rate': 4.707317073170732e-05, 'epoch': 0.59}
{'loss': 0.0383, 'grad_norm': 0.09076617658138275, 'learning_rate': 4.658536585365854e-05, 'epoch': 0.68}
{'loss': 0.0346, 'grad_norm': 0.1473614126443863, 'learning_rate': 4.609756097560976e-05, 'epoch': 0.78}
{'loss': 0.0313, 'grad_norm': 0.13732966780662537, 'learning_rate': 4.560975609756098e-05, 'epoch': 0.88}
{'loss': 0.0295, 'grad_norm': 0.4108411967754364, 

TrainOutput(global_step=10250, training_loss=0.015258814584918139, metrics={'train_runtime': 28490.0689, 'train_samples_per_second': 11.518, 'train_steps_per_second': 0.36, 'train_loss': 0.015258814584918139, 'epoch': 10.0})

In [24]:
def tokenize_input(texts, tokenizer, max_length=256):
    return tokenizer(texts, padding="max_length", truncation=True, max_length=max_length, return_tensors="pt")

def predict_entities(texts, model, tokenizer):
    inputs = tokenize_input(texts, tokenizer)
    inputs = {key: val.to(device) for key, val in inputs.items()}  # Move inputs to GPU
    with torch.no_grad():  # Disable gradient calculation
        outputs = model.generate(**inputs, max_new_tokens=256)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)


# Example geological text
geo_texts = [
    "the jubilee domain contains a complex sequence of ultramafic mafic rocks and more evolved rocks, due to magma mixing and fractional crystallization .",
]

# Predict geological entities
predictions = predict_entities(geo_texts, model, tokenizer)

# Print the results
print(predictions)
for text, prediction in zip(geo_texts, predictions):
    print(f"Text: {text}")
    print(f"Prediction: {prediction}")
    print()

['O B-LOCATION O O O O O O B-ROCK I-ROCK I-ROCK O O O O O O O O O O O O']
Text: the jubilee domain contains a complex sequence of ultramafic mafic rocks and more evolved rocks, due to magma mixing and fractional crystallization .
Prediction: O B-LOCATION O O O O O O B-ROCK I-ROCK I-ROCK O O O O O O O O O O O O



In [20]:
model.save_pretrained('./saved_model')  # Custom directory for saving
tokenizer.save_pretrained('./saved_model')  # Save tokenizer as well

('./saved_model\\tokenizer_config.json',
 './saved_model\\special_tokens_map.json',
 './saved_model\\tokenizer.json')

In [90]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

save_directory = "./NER/first_saved_model"
# Load the tokenizer and model from the saved directory
tokenizer = AutoTokenizer.from_pretrained(save_directory)
model = AutoModelForSeq2SeqLM.from_pretrained(save_directory).to(device)

In [111]:
# a = model.get_input_embeddings()

# # Words you want to get embeddings for
# words = ["geology"]

# # Tokenize the words to get token IDs
# inputs = tokenizer(words, return_tensors="pt", padding=True, truncation=True, is_split_into_words=True).to(device)
# print(a(inputs.input_ids))  # (batch_size, sequence_length, hidden_size)
# # for i in a:
# #     print(i)
# # print(model.get_input_embeddings().weight])

tensor([[[-15.1844,  -5.5618,  -1.0842,  ...,   7.9946, -26.2571,  10.1805],
         [ 17.0060,  32.5002, -40.2554,  ...,  -9.2514,   6.2497,   5.2842],
         [ 11.3847,  -4.8842,   9.0804,  ...,   4.8540,  14.3742,  -5.7740]]],
       device='cuda:0', grad_fn=<EmbeddingBackward0>)


In [27]:
test_file_path = 'NER/Training_data/EvaluationSet.txt' 
test_inputs, test_targets = preprocess_and_tokenize(test_file_path)

print(input_texts[:5])
print(target_labels[:5])


['the jubilee domain contains a complex sequence of ultramafic mafic rocks and interleaved sedimentary rocks , overlain by polymictic conglomerate .', 'extensive weathered banded quartz magnetite rocks occur throughout the project area .', 'the most common and significant metaliferrous rock types in the area are metamorphosed banded iron formations , ( bif ) and granular iron formations , ( gif ) .', 'these can be either completely oxidised medium to coarse grain hematite or more stable magnetite .', 'a major volcanic centre , defined by abundant felsic volcanics and quartz aluminosilicatechloritoid rocks ( considered to represent metamorphosed alteration assemblages ) , occurs in the central portion of this domain .']
['O B-LOCATION O O O O O O B-ROCK I-ROCK I-ROCK O O B-ROCK I-ROCK O O O B-ROCK I-ROCK O', 'O O O B-MINERAL B-MINERAL O O O O O O O', 'O O O O O B-ROCK I-ROCK O O O O O O B-ROCK I-ROCK I-ROCK O O B-ROCK O O B-ROCK I-ROCK I-ROCK O O B-ROCK O O', 'O O O O O O O O O O B-MINE

In [15]:
import json

# Remove entries with mismatched lengths
file_name = "NER/Training_Data/geotimeLabels.json"  # Replace with your file path

with open(file_name, 'r') as json_file:
    data = json.load(json_file)


print(len(data))

valid_data = []

# Iterate through the entries and check the lengths
for entry in data:
    output = entry['output']
    label = entry['labels']
    
    # Check if lengths are the same
    if len(output) == len(label):
        valid_data.append(entry)  # Keep only valid entries
    else:
        print(f"Removed entry with mismatched lengths: {len(output)} vs {len(label)}")
        print(f"Output: {output}")
        print(f"Labels: {label}")
        print()

# Print the number of valid entries
print(f"Number of valid entries: {len(valid_data)}")

with open(file_name, 'w') as json_file:
    json.dump(valid_data, json_file)



87
Number of valid entries: 87


In [54]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split

def retrain(model, tokenizer):
    # Preprocess and tokenize the data
    file_name = "NER/Training_Data/geotimeLabels.json"  # Replace with your file path
    input_texts, target_labels = preprocess_and_tokenize_json(file_name)
    
    # Split data into training and testing sets
    train_texts, test_texts, train_labels, test_labels = train_test_split(
        input_texts, target_labels, test_size=0.1, random_state=42
    )
    
    # Tokenize training and testing data
    train_data = tokenize_data(train_texts, train_labels, tokenizer)
    test_data = tokenize_data(test_texts, test_labels, tokenizer)
    
    # Prepare datasets
    train_dataset = NERDataset(train_data, train_data['labels'])
    test_dataset = NERDataset(test_data, test_data['labels'])

        # Define training arguments
    training_args = TrainingArguments(
        output_dir='./results',
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=50,
        logging_dir='./logs',
        logging_steps=100,
        evaluation_strategy="steps",
        gradient_accumulation_steps=8,
        save_steps=10000,
        eval_steps=10000,
        fp16=True,
        do_train=True,
        do_eval=True,
        do_predict=True,
    )

    # Initialize Trainer
    trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    )

    # Train the model
    trainer.train()

    return model, tokenizer

# save_directory = "./NER/saved_model"
# # Load the tokenizer and model from the saved directory
# tokenizer = AutoTokenizer.from_pretrained(save_directory)
# model = AutoModelForSeq2SeqLM.from_pretrained(save_directory).to(device)

# model, tokenizer = retrain(model, tokenizer)

In [25]:
texts = "The formation of the Rocky Mountains occurred around 70 Ma."

predictions = predict_entities(texts, model, tokenizer)

print(texts)
print(predictions)

print(len(predictions))


The formation of the Rocky Mountains occurred around 70 Ma.
['O O O O B-LOCATION I-LOCATION O O O B-GEO_TIME I-GEO_TIME O']
1


In [31]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.metrics import classification_report, accuracy_score, f1_score

# # Load the tokenizer and model from the saved directory
save_directory = "./NER/first_saved_model"
old_tokenizer = AutoTokenizer.from_pretrained(save_directory)
old_model = AutoModelForSeq2SeqLM.from_pretrained(save_directory).to(device)

def tokenize_input(texts, tokenizer, max_length=256):
    return tokenizer(texts, padding="max_length", truncation=True, max_length=max_length, return_tensors="pt")

def predict_entities(texts, model, tokenizer):
    inputs = tokenize_input(texts, tokenizer)
    inputs = {key: val.to(device) for key, val in inputs.items()}  # Move inputs to GPU

    with torch.no_grad():  # Disable gradient calculation
        outputs = model.generate(**inputs, max_new_tokens=256)  # Generate output sequences

    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

def evaluate_model(texts, true_labels, model, tokenizer):
    # predicted_texts = predict_entities(texts, model, tokenizer)
    predicted_texts = []
    for text in texts:
        predicted_texts.append(predict_entities(text, model, tokenizer)[0])
    
    true_labels_flat = []
    pred_labels_flat = []

    for i in range(len(texts)):
        pred_tokens = predicted_texts[i].split()
        true_tokens = true_labels[i].split()
        
        for j in range(len(true_tokens)):
            if true_tokens[j] != 'O':  # Ignore 'O' tokens if necessary
                true_labels_flat.append(true_tokens[j])
                pred_labels_flat.append(pred_tokens[j] if j < len(pred_tokens) else 'O')

    # Convert label strings to integers based on model's label mapping
    # label_ids = list(model.config.label2id.values())
    # print(label_ids)
    # true_labels_flat = [model.config.label2id[label] for label in true_labels_flat]
    # pred_labels_flat = [model.config.label2id[label] for label in pred_labels_flat]

    # Calculate metrics
    accuracy = accuracy_score(true_labels_flat, pred_labels_flat)
    f1_micro = f1_score(true_labels_flat, pred_labels_flat, average='micro')
    f1_macro = f1_score(true_labels_flat, pred_labels_flat, average='macro')
    
    report = classification_report(
        true_labels_flat, pred_labels_flat
    )

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1-score (Micro): {f1_micro:.4f}")
    print(f"F1-score (Macro): {f1_macro:.4f}")
    print("Classification Report:")
    print(report)

# Evaluate the model on the parsed test data
evaluate_model(test_inputs, test_targets, model, tokenizer)
evaluate_model(test_inputs, test_targets, old_model, old_tokenizer)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.6018
F1-score (Micro): 0.6018
F1-score (Macro): 0.3750
Classification Report:
               precision    recall  f1-score   support

  B-GEOCATION       0.00      0.00      0.00         0
   B-LOCATION       0.76      0.66      0.71      1006
    B-MINERAL       0.87      0.73      0.79      1313
B-ORE_DEPOSIT       0.82      0.54      0.65       382
       B-ROCK       0.85      0.59      0.70      1880
      B-STRAT       0.86      0.55      0.67       640
  B-TIMESCALE       0.94      0.70      0.81       210
   I-LOCATION       0.63      0.57      0.60       686
    I-MINERAL       0.62      0.16      0.26        91
I-ORE_DEPOSIT       0.78      0.61      0.69       301
       I-ROCK       0.79      0.46      0.58       756
      I-STRAT       0.76      0.60      0.67       780
  I-TIMESCALE       0.00      0.00      0.00         3
            O       0.00      0.00      0.00         0
            [       0.00      0.00      0.00         0
            ]       0.00     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
