In [1]:
!pip install transformers datasets seqeval evaluate torch

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting evaluate
  Downloading evaluate-0.4.4-py3-none-any.whl.metadata (9.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downlo

In [2]:
import os
import pandas as pd
import numpy as np

from datasets import load_dataset, ClassLabel, Sequence
from transformers import AutoTokenizer, AutoConfig, AutoModelForTokenClassification
from transformers import DataCollatorForTokenClassification, TrainingArguments, Trainer
import evaluate


# Download and load CoNLL file from GitHub raw URL

In [29]:
# Download raw CoNLL from your task-2 branch
url = "https://raw.githubusercontent.com/lhiwi/ethioMart-ner-project/task-2/data/raw/labeling_template.conll"
r = requests.get(url); r.raise_for_status()
with open("labeling_template.conll","w",encoding="utf8") as f: f.write(r.text)

In [30]:
# Read into Python structures
def read_conll(path):
    docs = []
    tokens, tags = [], []
    with open(path, encoding="utf8") as f:
        for line in f:
            line = line.strip()
            if not line:
                if tokens:
                    docs.append({"tokens": tokens, "ner_tags": tags})
                    tokens, tags = [], []
            else:
                tok, tag = line.split("\t")
                tokens.append(tok); tags.append(tag)
        if tokens:
            docs.append({"tokens": tokens, "ner_tags": tags})
    return docs

examples = read_conll("labeling_template.conll")


Build Label Maps & Create Dataset

In [31]:
# Build label maps
all_tags = sorted({t for ex in examples for t in ex["ner_tags"]})
label2id = {l:i for i,l in enumerate(all_tags)}
id2label = {i:l for l,i in label2id.items()}

In [32]:
# Convert tag strings to IDs
for ex in examples:
    ex["ner_tags"] = [label2id[t] for t in ex["ner_tags"]]


In [33]:
# Construct a Hugging Face Dataset
dataset = Dataset.from_list(examples)

In [38]:
# 6.4 Define and cast features
features = Features({
    "tokens":  Sequence(feature=Value("string")),
    "ner_tags": Sequence(feature=ClassLabel(names=all_tags))
})
dataset = dataset.cast(features)

Casting the dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

In [39]:
# Quick check
print(dataset[0])
print("Labels:", all_tags)


{'tokens': ['አድራሻሜክሲኮ', 'ኮሜርስ', 'ጀርባ', 'መዚድ', 'ፕላዛ', 'የመጀመሪያ', 'ደረጃ', 'እንደወጡ', 'የቢሮ', 'ቁጥር', 'ያገኙናል'], 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
Labels: ['O']


tokenize and align labels

In [40]:
model_name = "rasyosef/bert-tiny-amharic"  # or "facebook/xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

def tokenize_and_align(ex):
    tokenized = tokenizer(ex["tokens"], is_split_into_words=True, truncation=True)
    word_ids = tokenized.word_ids()
    labels = []
    for idx, wid in enumerate(word_ids):
        if wid is None:
            labels.append(-100)
        else:
            labels.append(ex["ner_tags"][wid])
    tokenized["labels"] = labels
    return tokenized

tokenized_ds = dataset.map(tokenize_and_align, batched=False)

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

train/validation split

In [41]:
split = tokenized_ds.train_test_split(test_size=0.2, seed=42)
train_ds, val_ds = split["train"], split["test"]

Data collator and metrics

In [42]:
data_collator = DataCollatorForTokenClassification(tokenizer)
metric = evaluate.load("seqeval")

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=2)
    true = p.label_ids
    def strip_neg100(pred, gold):
        return [(p, g) for p, g in zip(pred, gold) if g != -100]
    cleaned_preds = [[id2label[p] for p,g in strip_neg100(pr, gl)] for pr,gl in zip(preds, true)]
    cleaned_refs  = [[id2label[g] for p,g in strip_neg100(pr, gl)] for pr,gl in zip(preds, true)]
    out = metric.compute(predictions=cleaned_preds, references=cleaned_refs)
    return {"precision": out["overall_precision"], "recall": out["overall_recall"], "f1": out["overall_f1"], "accuracy": out["overall_accuracy"]}


In [46]:
# 10. Configure & Run Trainer (disable WandB integration)

from transformers import TrainingArguments, Trainer, AutoModelForTokenClassification

training_args = TrainingArguments(
    output_dir="models/ner_amharic",
    do_train=True,
    do_eval=True,
    eval_steps=100,
    logging_steps=50,
    save_steps=100,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="logs",
    report_to=["none"]        # disable WandB and other trackers
)

model = AutoModelForTokenClassification.from_pretrained(
    model_name, id2label=id2label, label2id=label2id
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()


Some weights of BertForTokenClassification were not initialized from the model checkpoint at rasyosef/bert-tiny-amharic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


TrainOutput(global_step=15, training_loss=0.0, metrics={'train_runtime': 2.3312, 'train_samples_per_second': 85.792, 'train_steps_per_second': 6.434, 'total_flos': 72133113312.0, 'train_loss': 0.0, 'epoch': 5.0})

Evaluate on the validation set and save the final model

In [47]:
# Evaluate
metrics = trainer.evaluate()
print("Evaluation metrics:", metrics)


Evaluation metrics: {'eval_loss': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 1.0, 'eval_runtime': 0.1164, 'eval_samples_per_second': 85.93, 'eval_steps_per_second': 8.593, 'epoch': 5.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


In [49]:
# Save the model and tokenizer
output_dir = "models/final_ner_model"
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
print(f" Model and tokenizer saved to {output_dir}")


 Model and tokenizer saved to models/final_ner_model


In [60]:
# Cell: Strip out any obsolete widget metadata from all notebooks
import glob
import nbformat

# Find every .ipynb under notebooks/
for path in glob.glob('notebooks/*.ipynb'):
    nb = nbformat.read(path, as_version=4)
    # Remove any 'widgets' metadata entry from each cell
    for cell in nb.cells:
        if 'widgets' in cell.metadata:
            del cell.metadata['widgets']
    # Overwrite the notebook in place
    nbformat.write(nb, path)
    print(f"Cleaned widgets metadata in {path}")

print("✅ All notebooks cleaned.")


✅ All notebooks cleaned.
