In [2]:
# use trl to train a model from stack dataset
import os

# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

import sys
import torch
import logging
import argparse
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model

model_path = '/data/sonald/ai_models/model_weights/Llama-2-7b-hf'
model = AutoModelForCausalLM.from_pretrained(model_path, load_in_8bit=True, device_map="auto")

model = prepare_model_for_int8_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM',
    target_modules=['gate_proj', 'up_proj', 'down_proj', 'o_proj'],
)
peft_model = get_peft_model(model, lora_config)

# ds = load_dataset('lvwerra/stack-exchange-paired', split='train', data_dir='data/rl', streaming=True)
ds = load_dataset('Anthropic/hh-rlhf', split='train', data_dir='helpful-base', streaming=False)


from typing import Dict


def common_prefix(s, t):
    from itertools import takewhile
    return ''.join(c[0] for c in takewhile(lambda x: x[0] == x[1], zip(s, t)))


def build_dpo_prompt_and_reponses(sample) -> Dict[str, str]:
    s, t = sample['chosen'], sample['rejected']
    prefix = common_prefix(s, t)

    return {
        'prompt': prefix,
        'chosen': s[len(prefix):],
        'rejected': t[len(prefix):],
    }
    pass

ds = ds.map(build_dpo_prompt_and_reponses, batched=False, num_proc=32, remove_columns=ds.column_names)
dss = ds.train_test_split(test_size=0.1)
train_ds = dss['train']
eval_ds = dss['test']


tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token_id = tokenizer.eos_token_id


In [None]:
from trl import DPOTrainer

args = TrainingArguments(
    output_dir='dpo-output',
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,

    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir='logs',
    logging_steps=100,
    save_steps=1000,
    eval_steps=1000,
    evaluation_strategy='steps',
    save_total_limit=2,
    # report_to='wandb',
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False,
)
dpo = DPOTrainer(model=model,
                 ref_model=None,
                 tokenizer=tokenizer,
                 args=args, beta=0.1,
                 train_dataset=train_ds,
                 eval_dataset=eval_ds,
                 max_prompt_length=2048,
                 max_length=4096,
                 peft_config=lora_config)
dpo.train()

[2024-02-19 18:08:24,943] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)




RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:3!

In [1]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

from datasets import load_dataset
ds = load_dataset('Anthropic/hh-rlhf', split='train', data_dir='helpful-base', streaming=False)


from typing import Dict


def common_prefix(s, t):
    from itertools import takewhile
    return ''.join(c[0] for c in takewhile(lambda x: x[0] == x[1], zip(s, t)))


def build_dpo_prompt_and_reponses(sample) -> Dict[str, str]:
    s, t = sample['chosen'], sample['rejected']
    prefix = common_prefix(s, t)

    return {
        'prompt': prefix,
        'chosen': s[len(prefix):],
        'rejected': t[len(prefix):],
    }
    pass

ds = ds.map(build_dpo_prompt_and_reponses, batched=True, num_proc=32, remove_columns=ds.column_names)


  from .autonotebook import tqdm as notebook_tqdm
Found cached dataset json (/home/uos/.cache/huggingface/datasets/Anthropic___json/Anthropic--hh-rlhf-902494df09f7345c/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
Loading cached processed dataset at /home/uos/.cache/huggingface/datasets/Anthropic___json/Anthropic--hh-rlhf-902494df09f7345c/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e/cache-02f904f635aae199_*_of_00032.arrow


In [2]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_path = '/data/sonald/ai_models/model_weights/deepseek-coder-6.7b-base'
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
)
model = AutoModelForCausalLM.from_pretrained(model_path,
    quantization_config=bnb_config)


Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.17s/it]


In [3]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaLinearScalingRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
  

In [5]:
[m for m in model.named_modules()]

[('',
  LlamaForCausalLM(
    (model): LlamaModel(
      (embed_tokens): Embedding(32256, 4096)
      (layers): ModuleList(
        (0-31): 32 x LlamaDecoderLayer(
          (self_attn): LlamaSdpaAttention(
            (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (rotary_emb): LlamaLinearScalingRotaryEmbedding()
          )
          (mlp): LlamaMLP(
            (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
            (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
            (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
            (act_fn): SiLU()
          )
          (input_layernorm): LlamaRMSNorm()
          (post_attenti