In [2]:
# Download files

import requests, zipfile, io

files_url = "https://ideami.com/llm_align"
print("Downloading files using Python")
# response = requests.get(files_url)
# zipfile.ZipFile(io.BytesIO(response.content)).extractall(".")

Downloading files using Python


In [3]:
# Import libraries
import os, sys
import math
from tqdm import tqdm
from datetime import datetime
import ipdb
from typing import List, Dict, Union

# PyTorch
import torch
import torch.nn as nn
from torch.nn import functional as F

# Import some HuggingFace Libraries
import transformers
from datasets import load_dataset, load_from_disk

# Performance (if you have cuda)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

torch.cuda.empty_cache()

# Optional, for debugging, if you want to view entire tensors
torch.set_printoptions(threshold=10000)

In [4]:
# Training parameters
batch_size = 1
epochs = 3 # after 3 epochs can possibly degrade or get worse
lr = 6e-5
lr_warmup_steps = 100 # increase learning rate until 100 steps
context = 1024
alpha = 0.5 # weighting for the ORPO odds ratio (sort of loss calculation variable)
prompt_max_size = 512 # limit for the prompt part of the interaction. 
# prompt: includes all the interaction except the last answer
# response: includes either the positive chosen answer or the negative rejected one
compile = False
dtype = torch.bfloat16
log_iters = 50

#HYPERPARAMETERS
dropout = 0.
grad_clip = 1.0
weight_decay = 0.0

# DEVICE
device = "cuda" if torch.cuda.is_available() else "mps" #iOS: mps, Windows: cpu
print("device: You will be using: ", device)

device: You will be using:  mps


In [5]:
# LOGGING
project_name = "alignment"
wandb_log = True
wandb_project = project_name
# wandb_run_name = "aligntest-run"
wandb_run_name = "aligntest-run" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S") # recommended

if wandb_log:
    import wandb
    wandb.init(project=wandb_project, name=wandb_run_name)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mnickyoon89[0m ([33mnickyoon89-miss-to-mrs-box[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
dataset_path = ".data/orpo_dataset"
dataset_name = "mlabonne/orpo-dpo-mix-40k"
tokenizer_path = "tokenizers/tok16384"
checkpoint_dir = "./models/"

# Tokenizing Dataset
# Load tokenizer in HuggingFace Format
tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_path)

# Set our interaction template
tokenizer.chat_template = "{% for message in messages %}{% if message['role']=='user' %}\n{{ '<|user|>'\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{%endif %}\n{% endfor %}"

# Make padding token equal to the end of sentence token (which has ID of 2 in our case)
tokenizer.pad_token = tokenizer.eos_token

if os.path.exists(dataset_path):
    dataset = load_from_disk(dataset_path)
else:
    dataset = load_dataset(dataset_name, split="all")
    # Now we will tokenize it
    