In [1]:
!pip install git+https://github.com/omarxadel/camel_tools.git
!camel_data -i all

Collecting git+https://github.com/omarxadel/camel_tools.git
  Cloning https://github.com/omarxadel/camel_tools.git to /tmp/pip-req-build-ilamps7o
  Running command git clone --filter=blob:none --quiet https://github.com/omarxadel/camel_tools.git /tmp/pip-req-build-ilamps7o
  Resolved https://github.com/omarxadel/camel_tools.git to commit e8e831ed781f2f141a2513e33c8a9c7e5b94554d
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting camel-kenlm@ git+https://github.com/omarxadel/camel-kenlm.git (from camel_tools==1.5.5)
  Cloning https://github.com/omarxadel/camel-kenlm.git to /tmp/pip-install-1oq4sonh/camel-kenlm_ad1b3afaeaf544638b5532a1dc725c61
  Running command git clone --filter=blob:none --quiet https://github.com/omarxadel/camel-kenlm.git /tmp/pip-install-1oq4sonh/camel-kenlm_ad1b3afaeaf544638b5532a1dc725c61
  Resolved https://github.com/omarxadel/camel-ken

In [2]:
!pip install unsloth datasets tqdm transformers

Collecting unsloth
  Downloading unsloth-2025.3.19-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting unsloth_zoo>=2025.3.17 (from unsloth)
  Downloading unsloth_zoo-2025.3.17-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.18-py3-none-any.whl.metadata (9.2 kB)
Collecting transformers
  Downloading transformers-4.51.0-py3-none-any.whl.metadata (38 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from un

In [3]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from datasets import DatasetDict
from tqdm import tqdm
import time
from transformers import GenerationConfig
import json

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Failed to patch Gemma3ForConditionalGeneration.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
from camel_tools.morphology.database import MorphologyDB
from camel_tools.morphology.analyzer import Analyzer
from camel_tools.tokenizers.morphological import MorphologicalTokenizer
from camel_tools.disambig.mle import MLEDisambiguator

mle_msa = MLEDisambiguator.pretrained('calima-msa-r13')
morph_tokenizer = MorphologicalTokenizer(disambiguator=mle_msa, scheme='atbtok')

In [5]:
def load_model(model_name="omarxadel/Arabic-Morph-DeepSeek-R1-Distill-Llama-8B"):
    """Loads the fine-tuned model and tokenizer."""
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True,
    )
    FastLanguageModel.for_inference(model)
    return model, tokenizer

In [6]:
model, tokenizer = load_model()



==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.51.0.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 6.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]



adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [7]:
# Define a custom tokenizer class
class CustomArabicTokenizer:
    def __init__(self, base_tokenizer, morph_tokenizer):
        self.base_tokenizer = base_tokenizer
        self.morph_tokenizer = morph_tokenizer

    def __call__(self, text, **kwargs):
        # Tokenize using the morphological tokenizer
        morph_tokens = self.camel_morph_tokenize(text)
        # Join tokens back into a string
        morph_text = ' '.join(morph_tokens)
        # Tokenize using the base tokenizer
        return self.base_tokenizer(morph_text, **kwargs)

    def camel_morph_tokenize(self, text):
        words = text.split()  # Split text into words
        tokenized_words = self.morph_tokenizer.tokenize(words)
        return tokenized_words

    def tokenize(self, text, **kwargs):
        # Tokenize using the morphological tokenizer first
        morph_tokens = self.camel_morph_tokenize(text)
        morph_text = ' '.join(morph_tokens)
        # Return token strings from base tokenizer
        return self.base_tokenizer.tokenize(morph_text, **kwargs)

    def decode(self, token_ids, **kwargs):
        return self.base_tokenizer.decode(token_ids, **kwargs)


# Instantiate the custom tokenizer
custom_tokenizer = CustomArabicTokenizer(tokenizer, morph_tokenizer)

In [8]:
def generate_response(model, tokenizer, instruction, options, max_new_tokens=2048):
    chat_template = """Below are some Multiple Choice Questions. Write responses in Arabic language only that appropriately complete each request in a valid, parsable JSON format with two attributes, one will be "reasoning" which is your thought process, 
    the other is the "solution" that has only a letter (a, b, c or d) in English, which represents the option you chose for the solution based on the options provided in the question.

### Question:
{INPUT}

### Options:
{OPTIONS}

### Solution JSON:
"""
    prompt = chat_template.replace("{INPUT}", instruction)
    prompt = prompt.replace("{OPTIONS}", options)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    start = time.time()
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
        )
    end = time.time()
    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return output_text, end - start, len(tokenizer.tokenize(output_text))

In [9]:
def evaluate(model, tokenizer, dataset, output_path="generations.jsonl", max_samples=5):
    correct = 0
    total = 0
    total_time = 0
    total_tokens = 0

    with open(output_path, "w", encoding="utf-8") as outfile:
        for example in tqdm(dataset.select(range(max_samples))):
            question = example["Question"]
            options = [
                example["Option 1"],
                example["Option 2"],
                example["Option 3"],
                example["Option 4"],
            ]
            answer = example["Answer Key"]

            instruction = f"{question}\n"
            options_str = ""
            for i, opt in enumerate(options):
                options_str += f"{chr(97+i)}. {opt}\n"  # a, b, c, d

            response, elapsed_time, token_len = generate_response(model, tokenizer, instruction, options_str)
            try:
                response_json = json.loads(response.strip().split("### Solution:")[-1])
                solution = response_json.get("solution", "").lower()
            except Exception:
                response_json = None
                solution = None

            log_entry = {
                "id": example["ID"],
                "question": question,
                "options": {
                    "a": options[0],
                    "b": options[1],
                    "c": options[2],
                    "d": options[3],
                },
                "answer_key": answer.lower(),
                "generated_text": response,
            }

            outfile.write(json.dumps(log_entry, ensure_ascii=False) + "\n")

            if solution == answer.lower():
                correct += 1
            total += 1
            total_time += elapsed_time
            total_tokens += token_len

    print(f"Accuracy: {correct / total * 100:.2f}%")
    print(f"Average token length: {total_tokens / total:.2f} tokens")
    print(f"Average compute time: {total_time / total:.2f} seconds")
    print(f"Output saved to: {output_path}")

In [10]:
ds = load_dataset("MBZUAI/ArabicMMLU", "All")["dev"]

README.md:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

test.csv:   0%|          | 0.00/6.69M [00:00<?, ?B/s]

dev.csv:   0%|          | 0.00/49.4k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/14455 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/120 [00:00<?, ? examples/s]

In [11]:
evaluate(model, custom_tokenizer, ds, max_samples=120)  # Set this higher for full benchmark

100%|██████████| 120/120 [1:24:53<00:00, 42.44s/it]

Accuracy: 0.00%
Average token length: 883.77 tokens
Average compute time: 42.27 seconds
Output saved to: generations.jsonl



