<a href="https://colab.research.google.com/github/jslinuxta/Colab/blob/main/%F0%9F%92%A5_LazyORPO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title # 💥 LazyORPO

# @markdown 🔄 Replaces `SFT+DPO/PPO` with `1` single method `ORPO`.

# @markdown 🏆 ORPO Outperforms `SFT, SFT+DPO` on `PHI-2, Llama 2, and Mistral`

# @markdown 📊 Mistral ORPO achieves `12.20%` on AlpacaEval2.0, `66.19%` on IFEval, and 7.32 on MT-Bench Zephyr Beta

# @markdown 🔮 Created by [@zainulabideen](https://huggingface.co/abideen).

# @markdown 🔬 Based on [ORPO paper](https://huggingface.co/papers/2403.07691)

# @markdown ---

# @markdown ### 🤗 Training Parameters

!pip install -qqq runpod --progress-bar off


import runpod

MODEL_ID = "microsoft/phi-2" # @param {type:"string"}
NEW_MODEL = "abideen/phi2-pro"
DATASET = "argilla/dpo-mix-7k" # @param {type:"string"}
EPOCH = 1 # @param {type:"integer"}
LEARNING_RATE = 5e-6 # @param {type:"number"}
USERNAME="abideen" # @param {type:"string"}
WANDB_PROJECT="phi2-mix" # @param {type:"string"}
HF_TOKEN="" # @param {type:"string"}
WANDB_TOKEN="" # @param {type:"string"}
OUTPUT = "checkpoints/"+ DATASET.split("/")[-1]
GPU = "NVIDIA A40" # @param ["NVIDIA A100 80GB PCIe", "NVIDIA A100-SXM4-80GB", "NVIDIA A30", "NVIDIA A40", "NVIDIA GeForce RTX 3070", "NVIDIA GeForce RTX 3080", "NVIDIA GeForce RTX 3080 Ti", "NVIDIA GeForce RTX 3090", "NVIDIA GeForce RTX 3090 Ti", "NVIDIA GeForce RTX 4070 Ti", "NVIDIA GeForce RTX 4080", "NVIDIA GeForce RTX 4090", "NVIDIA H100 80GB HBM3", "NVIDIA H100 PCIe", "NVIDIA L4", "NVIDIA L40", "NVIDIA RTX 4000 Ada Generation", "NVIDIA RTX 4000 SFF Ada Generation", "NVIDIA RTX 5000 Ada Generation", "NVIDIA RTX 6000 Ada Generation", "NVIDIA RTX A2000", "NVIDIA RTX A4000", "NVIDIA RTX A4500", "NVIDIA RTX A5000", "NVIDIA RTX A6000", "Tesla V100-FHHL-16GB", "Tesla V100-PCIE-16GB", "Tesla V100-SXM2-16GB", "Tesla V100-SXM2-32GB"]
NUMBER_OF_GPUS = 1 # @param {type:"slider", min:1, max:8, step:1}
CONTAINER_DISK = 500 # @param {type:"slider", min:50, max:500, step:25}
VOLUME_STORAGE = 500 # @param {type:"slider", min:50, max:500, step:25}
CLOUD_TYPE = "SECURE" # @param ["COMMUNITY", "SECURE"]
REPO = "https://github.com/abideenml/AutoOPRO.git" # @param {type:"string"}
TRUST_REMOTE_CODE = False # @param {type:"boolean"}

RUNPOD_TOKEN = "" # @param {type:"string"}
runpod.api_key = RUNPOD_TOKEN

pod = runpod.create_pod(
    name=f"ORPO POD: {MODEL_ID.split('/')[-1]}",
    image_name="runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04",
    gpu_type_id=GPU,
    cloud_type=CLOUD_TYPE,
    gpu_count=NUMBER_OF_GPUS,
    volume_in_gb=VOLUME_STORAGE,
    container_disk_in_gb=CONTAINER_DISK,
    template_id="au6nz6emhk",
    env={
        "MODEL_ID": MODEL_ID,
        "DATASET": DATASET,
        "EPOCH": EPOCH,
        "NEW_MODEL": NEW_MODEL,
        "LEARNING_RATE": LEARNING_RATE,
        "OUTPUT": OUTPUT,
        "USERNAME": USERNAME,
        "WANDB_TOKEN": WANDB_TOKEN,
        "WANDB_PROJECT": WANDB_PROJECT,
        "TOKEN": HF_TOKEN,
        "REPO": REPO,
        "TRUST_REMOTE_CODE": TRUST_REMOTE_CODE,
    }
)
print("Pod started: https://www.runpod.io/console/pods")