# Quran Transformer Training (Colab)
Follow these cells to fine-tune the transformer on the prepared Quran dataset.

In [None]:
# Clone the repository (skip if you opened the notebook from within the repo)
from pathlib import Path
import os
repo_path = Path('/content/Eqratech_Arabic_Diana_Project')
if not repo_path.exists():
    !git clone https://github.com/salemqundil/Eqratech_Arabic_Diana_Project.git
os.chdir(repo_path)

In [None]:
# Install dependencies required for training
!pip install -r requirements.txt
!pip install -r requirements-dev.txt
!pip install transformers datasets accelerate evaluate pyyaml

In [None]:
# (Optional) Mount Google Drive if the dataset or outputs live there
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Ensure the prepared dataset is available under data/quran/
# If the dataset is stored on Drive, copy or unzip it now. Adjust the source path as needed.
!mkdir -p data
dataset_zip = '/content/drive/MyDrive/taqi_dataset/quran_dataset.zip'
if Path(dataset_zip).exists():
    !unzip -o {dataset_zip} -d data/
else:
    print('Dataset zip not found at', dataset_zip)

In [None]:
# Update config paths to point to the Colab workspace
from pathlib import Path
import yaml
config_path = Path('configs/transformer_quran_colab.yaml')
config = yaml.safe_load(config_path.read_text(encoding='utf-8'))
base_data_path = '/content/Eqratech_Arabic_Diana_Project/data/quran'
config['dataset']['train_file'] = f'{base_data_path}/train.jsonl'
config['dataset']['validation_file'] = f'{base_data_path}/validation.jsonl'
config['dataset']['test_file'] = f'{base_data_path}/test.jsonl'
config['training']['output_dir'] = '/content/outputs/quran-transformer'
config_path.write_text(yaml.safe_dump(config, allow_unicode=True), encoding='utf-8')
config

In [None]:
# Kick off the training run
!python scripts/train_transformer_quran.py --config configs/transformer_quran_colab.yaml

In [None]:
# Package outputs for download or Drive backup
!zip -r /content/outputs_quran.zip /content/outputs/quran-transformer
!cp /content/outputs_quran.zip /content/drive/MyDrive/taqi_artifacts/outputs_quran.zip