In [None]:
!pip install trl

In [2]:
from IPython.display import Image

In [3]:
import os
os.environ['http_proxy'] = 'http://127.0.0.1:7890'
os.environ['https_proxy'] = 'http://127.0.0.1:7890'
os.environ['NCCL_P2P_DISABLE'] = '1'
os.environ['NCCL_IB_DISABLE'] = '1'
os.environ['WANDB_DISABLED'] = 'true'
os.environ['HF_HOME'] = '/home/samtang/.hf/'

In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM

## collate_fn (dataloader)

In [6]:
Image(url='https://lukesalamone.github.io/img/torch_collate_fn.png', width=400)

In [9]:
dataset = load_dataset('lucasmccabe-lmi/CodeAlpaca-20k', split='train')

In [10]:
dataset

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 20022
})

In [9]:
dataset[0]

{'instruction': 'Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.',
 'input': '',
 'output': 'def f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x'}

## format function

In [21]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        text = f"### Question:\n{example['instruction'][i]}\n ### Answer:<|end_header_id|>\n{example['output'][i]}"
        output_texts.append(text)
    return output_texts


In [83]:
output_texts = formatting_prompts_func(dataset[:2])

In [84]:
print(dataset[0], '\n')
print(output_texts[0])

{'instruction': 'Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.', 'input': '', 'output': 'def f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x'} 

### Question:
Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.
 ### Answer:<|end_header_id|>
def f(x):
    """
    Takes a specific input and produces a specific output using any mathematical operators
    """
    return x**2 + 3*x


## finetune facebook opt-350m

In [None]:
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

In [15]:
tokenizer

GPT2TokenizerFast(name_or_path='facebook/opt-350m', vocab_size=50265, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '</s>', 'eos_token': '</s>', 'unk_token': '</s>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	1: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}

In [85]:
tokenizer.chat_template

In [86]:
response_template = "<|end_header_id|>"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

In [87]:
dataset[:2]

{'instruction': ['Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.',
  'Generate a unique 8 character string that contains a lowercase letter, an uppercase letter, a numerical digit, and a special character. Write corresponding code in Python.'],
 'input': ['', ''],
 'output': ['def f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x',
  "import string\nimport random\n\ndef random_password_string():\n    characters = string.ascii_letters + string.digits + string.punctuation\n    password = ''.join(random.sample(characters, 8))\n    return password\n\nif __name__ == '__main__':\n    print(random_password_string())"]}

In [88]:
tokenized_data = [tokenizer.encode(s) for s in formatting_prompts_func(dataset[:2])]

In [89]:
from torch.utils.data import DataLoader
# print(tokenizer.decode(tokenized_data[0]))

dataloader = DataLoader(tokenized_data, collate_fn=collator, batch_size=1)

In [90]:
row = next(iter(dataloader))
row

{'input_ids': tensor([[    2, 48134, 15680,    35, 50118, 44758,    10,  5043,    14,  1239,
             10,  2167,  8135,     8,  9108,    10,  2167,  4195,   634,   143,
          30412,  5990,     4, 21062, 12337,  3260,    11, 31886,     4, 50118,
          22560, 31652,    35, 41552, 15483,  1397,  1215, 24419,  1215,   808,
          15483, 15698, 50118,  9232,   856,  1640,  1178,  3256, 50118,  1437,
           1437,  1437, 49434, 50118,  1437,  1437,  1437, 29072,    10,  2167,
           8135,     8,  9108,    10,  2167,  4195,   634,   143, 30412,  5990,
          50118,  1437,  1437,  1437, 49434, 50118,  1437,  1437,  1437,   671,
           3023, 12606,   176,  2055,   155,  3226,  1178]]),
 'labels': tensor([[ -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
           -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
           -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
           -100,  -100,  -100,  -10

In [71]:
print(tokenizer.decode([2, 48134, 15680,    35, 50118, 44758,    10,  5043,    14,  1239,
             10,  2167,  8135,     8,  9108,    10,  2167,  4195,   634,   143,
          30412,  5990,     4, 21062, 12337,  3260,    11, 31886,     4, 50118,
          22560, 31652,    35, 50118,  9232,   856,  1640,  1178,  3256, 50118,
           1437,  1437,  1437, 49434, 50118,  1437,  1437,  1437, 29072,    10,
           2167,  8135,     8,  9108,    10,  2167,  4195,   634,   143, 30412,
           5990, 50118,  1437,  1437,  1437, 49434, 50118,  1437,  1437,  1437,
            671,  3023, 12606,   176,  2055,   155,  3226,  1178]))

</s>### Question:
Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.
 ### Answer:
def f(x):
    """
    Takes a specific input and produces a specific output using any mathematical operators
    """
    return x**2 + 3*x


In [92]:
print(tokenizer.decode([50118,  9232,   856,  1640,  1178,  3256, 50118,  1437,
           1437,  1437, 49434, 50118,  1437,  1437,  1437, 29072,    10,  2167,
           8135,     8,  9108,    10,  2167,  4195,   634,   143, 30412,  5990,
          50118,  1437,  1437,  1437, 49434, 50118,  1437,  1437,  1437,   671,
           3023, 12606,   176,  2055,   155,  3226,  1178]))


def f(x):
    """
    Takes a specific input and produces a specific output using any mathematical operators
    """
    return x**2 + 3*x


#### 1. SFTTrainer

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=SFTConfig(output_dir='/tmp', report_to='wandb'),
    formatting_func=formatting_prompts_func,
    data_collator=collator,
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Map: 100%|██████████| 20022/20022 [00:00<00:00, 28582.15 examples/s]


#### 2. train()

In [94]:
trainer.train()

 50%|█████     | 3760/7509 [30:09<30:03,  2.08it/s]
  7%|▋         | 500/7509 [01:34<23:41,  4.93it/s]

{'loss': 1.4623, 'grad_norm': 8.518695831298828, 'learning_rate': 1.8668264748967905e-05, 'epoch': 0.2}


 13%|█▎        | 1000/7509 [03:09<20:13,  5.36it/s] 

{'loss': 1.2277, 'grad_norm': 8.79334545135498, 'learning_rate': 1.733652949793581e-05, 'epoch': 0.4}


 20%|█▉        | 1500/7509 [04:41<26:59,  3.71it/s]  

{'loss': 1.1388, 'grad_norm': 6.205975532531738, 'learning_rate': 1.6004794246903717e-05, 'epoch': 0.6}


 27%|██▋       | 2000/7509 [06:14<15:31,  5.92it/s]  

{'loss': 1.0991, 'grad_norm': 8.490642547607422, 'learning_rate': 1.4673058995871624e-05, 'epoch': 0.8}


 33%|███▎      | 2500/7509 [07:44<11:14,  7.42it/s]  

{'loss': 1.0565, 'grad_norm': 8.074155807495117, 'learning_rate': 1.3341323744839527e-05, 'epoch': 1.0}


 40%|███▉      | 3000/7509 [09:15<13:42,  5.48it/s]  

{'loss': 0.8705, 'grad_norm': 5.781159400939941, 'learning_rate': 1.2009588493807431e-05, 'epoch': 1.2}


 47%|████▋     | 3500/7509 [10:51<12:12,  5.47it/s]  

{'loss': 0.8575, 'grad_norm': 8.283281326293945, 'learning_rate': 1.0677853242775338e-05, 'epoch': 1.4}


 53%|█████▎    | 4000/7509 [12:29<08:37,  6.78it/s]  

{'loss': 0.8393, 'grad_norm': 9.199316024780273, 'learning_rate': 9.346117991743243e-06, 'epoch': 1.6}


 60%|█████▉    | 4500/7509 [14:03<08:14,  6.08it/s]

{'loss': 0.8506, 'grad_norm': 7.997333526611328, 'learning_rate': 8.014382740711147e-06, 'epoch': 1.8}


 67%|██████▋   | 5000/7509 [15:37<08:47,  4.75it/s]

{'loss': 0.8212, 'grad_norm': 7.827569007873535, 'learning_rate': 6.682647489679053e-06, 'epoch': 2.0}


 73%|███████▎  | 5500/7509 [17:13<08:13,  4.07it/s]

{'loss': 0.6933, 'grad_norm': 6.574479103088379, 'learning_rate': 5.350912238646957e-06, 'epoch': 2.2}


 80%|███████▉  | 6000/7509 [18:50<05:24,  4.65it/s]

{'loss': 0.6852, 'grad_norm': 4.303717613220215, 'learning_rate': 4.019176987614862e-06, 'epoch': 2.4}


 87%|████████▋ | 6500/7509 [20:25<04:48,  3.50it/s]

{'loss': 0.6826, 'grad_norm': 5.468385696411133, 'learning_rate': 2.6874417365827676e-06, 'epoch': 2.6}


 93%|█████████▎| 7000/7509 [21:58<01:29,  5.67it/s]

{'loss': 0.6828, 'grad_norm': 5.402605056762695, 'learning_rate': 1.3557064855506728e-06, 'epoch': 2.8}


100%|█████████▉| 7500/7509 [23:32<00:01,  6.45it/s]

{'loss': 0.6701, 'grad_norm': 6.4568586349487305, 'learning_rate': 2.3971234518577706e-08, 'epoch': 3.0}


100%|██████████| 7509/7509 [23:38<00:00,  5.29it/s]

{'train_runtime': 1418.8425, 'train_samples_per_second': 42.335, 'train_steps_per_second': 5.292, 'train_loss': 0.9089754720330476, 'epoch': 3.0}





TrainOutput(global_step=7509, training_loss=0.9089754720330476, metrics={'train_runtime': 1418.8425, 'train_samples_per_second': 42.335, 'train_steps_per_second': 5.292, 'total_flos': 3.0378389083324416e+16, 'train_loss': 0.9089754720330476, 'epoch': 3.0})

In [107]:
model

OPTForCausalLM(
  (model): OPTModel(
    (decoder): OPTDecoder(
      (embed_tokens): Embedding(50272, 512, padding_idx=1)
      (embed_positions): OPTLearnedPositionalEmbedding(2050, 1024)
      (project_out): Linear(in_features=1024, out_features=512, bias=False)
      (project_in): Linear(in_features=512, out_features=1024, bias=False)
      (layers): ModuleList(
        (0-23): 24 x OPTDecoderLayer(
          (self_attn): OPTSdpaAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (activation_fn): ReLU()
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features

In [108]:
from transformers import pipeline

#### 3. test finetune effect

In [109]:
pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer, device_map='auto')

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [120]:
ans = pipeline('Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.', max_length=512)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [121]:
print(ans[0]['generated_text'])

Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.

def get_output(x):
    return x**2 + x**2

print(get_output(5))

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output:
5

Output

## finetune qwen2.5-7b

#### 1. load model and tokenizer

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = 'Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm
  def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq):
  def backward(ctx, grad_output):
  @custom_fwd(cast_inputs=torch.float16)
CUDA extension not installed.
CUDA extension not installed.
`low_cpu_mem_usage` was None, now default to True since model is quantized.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00, 14.68it/s]


#### 2. evaluate origin generate text

In [3]:
from datasets import load_dataset
dataset = load_dataset('lucasmccabe-lmi/CodeAlpaca-20k', split='train')
dataset[0]

{'instruction': 'Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.',
 'input': '',
 'output': 'def f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x'}

In [4]:
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (rotary_emb): Qwen2RotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): Qwen2MLP(
          (act_fn): SiLU()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb): Qwen2RotaryEmbedding()
  )
  (lm_head): Linear(in_features=3584, out_features=152064, bias=False)
)

In [None]:
# pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer)
# generator = pipeline('text-generation', model, tokenizer, device_map='auto')
# result = generator({"question":"Hello, how are you?"}, max_length=50)
# result
import torch
# cuda is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
input_text = dataset['instruction'][0]
inputs = tokenizer.encode(input_text, return_tensors='pt').to(device)
# cpu time >2m40s

OutOfMemoryError: CUDA out of memory. Tried to allocate 260.00 MiB. GPU 0 has a total capacity of 23.61 GiB of which 177.44 MiB is free. Including non-PyTorch memory, this process has 22.19 GiB memory in use. Of the allocated memory 21.42 GiB is allocated by PyTorch, and 323.67 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
model.to(device)
# Generate text
outputs = model.generate(inputs["input_ids"], max_length=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [15]:
tokenizer.decode(outputs[0], skip_special_tokens=True)

TypeError: argument 'ids': 'list' object cannot be interpreted as an integer

In [25]:
sftconfig = SFTConfig(
    output_dir='./tmp',
    warmup_ratio=0.1,
    # report_to='wandb',
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


#### 3. get peft model

In [26]:
from peft import TaskType, LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [13]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=16,
    target_modules=[
        "self_attn.q_proj",
        "self_attn.k_proj",
        "self_attn.v_proj",
        "self_attn.o_proj",
        "mlp.gate_proj",
        "mlp.up_proj",
        "mlp.down_proj",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

In [14]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(152064, 3584)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2SdpaAttention(
              (rotary_emb): Qwen2RotaryEmbedding()
              (k_proj): lora.QuantLinear(
                (base_layer): QuantLinear()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3584, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=512, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
                (quant_linear_modu

In [27]:
model.print_trainable_parameters()

trainable params: 80,740,352 || all params: 1,170,939,392 || trainable%: 6.8953


In [44]:
print(tokenizer.chat_template)

{%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0]['role'] == 'system' %}
        {{- messages[0]['content'] }}
    {%- else %}
        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
    {%- endif %}
    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0]['role'] == 'system' %}
        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
    {%- else %}
        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba C

In [70]:
prompt = "Give me a short introduction to large language model."
messages = [
    {"role": "system", "content": 'You are a useful assistant to generate python function by user instruction!'},
    {"role": "user", "content": prompt},
    {"role": "assistant", "content": "xldjalskdajlsxx"}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=False
)
print(text)

<|im_start|>system
You are a useful assistant to generate python function by user instruction!<|im_end|>
<|im_start|>user
Give me a short introduction to large language model.<|im_end|>
<|im_start|>assistant
xldjalskdajlsxx<|im_end|>



In [61]:
dataset[0]

{'instruction': 'Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.',
 'input': '',
 'output': 'def f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x'}

In [71]:
def prepare_dataset(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        messages = [
            {"role": "system", "content": 'You are a useful assistant to generate python function by user instruction!'},
            {"role": "user", "content": example['instruction'][i]},
            {"role": "assistant", "content": example['output'][i]}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )
        output_texts.append(text)
    return output_texts

print(prepare_dataset(dataset[:1]))

['<|im_start|>system\nYou are a useful assistant to generate python function by user instruction!<|im_end|>\n<|im_start|>user\nCreate a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.<|im_end|>\n<|im_start|>assistant\ndef f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x<|im_end|>\n']


In [74]:
from torch.utils.data import DataLoader
collator = DataCollatorForCompletionOnlyLM(response_template='<|im_start|>assistant', tokenizer=tokenizer)

tokenized_data = [tokenizer.encode(s) for s in prepare_dataset(dataset[:1])]
dataloader = DataLoader(tokenized_data, collate_fn=collator, batch_size=1)
row = next(iter(dataloader))
row

{'input_ids': tensor([[151644,   8948,    198,   2610,    525,    264,   5390,  17847,    311,
            6923,  10135,    729,    553,   1196,   7600,      0, 151645,    198,
          151644,    872,    198,   4021,    264,    729,    429,   4990,    264,
            3151,   1946,    323,  18644,    264,   3151,   2550,   1667,    894,
           35972,  19624,     13,   9645,  12159,   2038,    304,  13027,     13,
          151645,    198, 151644,  77091,    198,    750,    282,   2075,    982,
             262,   3190,    262,  37607,    264,   3151,   1946,    323,  18644,
             264,   3151,   2550,   1667,    894,  35972,  19624,    198,    262,
            3190,    262,    470,    856,    334,     17,    488,    220,     18,
           18481, 151645,    198]]),
 'labels': tensor([[  -100,   -100,   -100,   -100,   -100,   -100,   -100,   -100,   -100,
            -100,   -100,   -100,   -100,   -100,   -100,   -100,   -100,   -100,
            -100,   -100,   -100,   -1

In [77]:
print(tokenizer.decode([198,    750,    282,   2075,    982,
             262,   3190,    262,  37607,    264,   3151,   1946,    323,  18644,
             264,   3151,   2550,   1667,    894,  35972,  19624,    198,    262,
            3190,    262,    470,    856,    334,     17,    488,    220,     18,
           18481, 151645,    198]))


def f(x):
    """
    Takes a specific input and produces a specific output using any mathematical operators
    """
    return x**2 + 3*x<|im_end|>



In [79]:
trainer = SFTTrainer(
    model=model,
    args=sftconfig,
    train_dataset=dataset,
    formatting_func=prepare_dataset,
    tokenizer=tokenizer,
    data_collator=collator,
)

Map: 100%|██████████| 20022/20022 [00:01<00:00, 14958.20 examples/s]


In [80]:
trainer.train()             

  return fn(*args, **kwargs)
  2%|▏         | 500/30033 [04:23<4:04:48,  2.01it/s]

{'loss': 1.4866, 'grad_norm': 2.6288650035858154, 'learning_rate': 3.3288948069241015e-06, 'epoch': 0.05}


  return fn(*args, **kwargs)
  3%|▎         | 1000/30033 [08:49<4:05:11,  1.97it/s]

{'loss': 0.5673, 'grad_norm': 0.7912633419036865, 'learning_rate': 6.657789613848203e-06, 'epoch': 0.1}


  return fn(*args, **kwargs)
  5%|▍         | 1500/30033 [13:16<4:04:40,  1.94it/s]

{'loss': 0.5572, 'grad_norm': 0.7089880108833313, 'learning_rate': 9.986684420772305e-06, 'epoch': 0.15}


  return fn(*args, **kwargs)
  7%|▋         | 2000/30033 [17:42<4:06:27,  1.90it/s]

{'loss': 0.5324, 'grad_norm': 0.658597469329834, 'learning_rate': 1.3315579227696406e-05, 'epoch': 0.2}


  return fn(*args, **kwargs)
  8%|▊         | 2500/30033 [22:08<4:12:45,  1.82it/s]

{'loss': 0.5511, 'grad_norm': 1.1446051597595215, 'learning_rate': 1.664447403462051e-05, 'epoch': 0.25}


  return fn(*args, **kwargs)
 10%|▉         | 3000/30033 [26:12<3:31:49,  2.13it/s]

{'loss': 0.5342, 'grad_norm': 1.456231951713562, 'learning_rate': 1.997336884154461e-05, 'epoch': 0.3}


  return fn(*args, **kwargs)
 12%|█▏        | 3500/30033 [30:13<3:24:39,  2.16it/s]

{'loss': 0.5662, 'grad_norm': 1.9061211347579956, 'learning_rate': 1.963298679196419e-05, 'epoch': 0.35}


  return fn(*args, **kwargs)
 13%|█▎        | 4000/30033 [34:16<3:31:08,  2.05it/s]

{'loss': 0.5321, 'grad_norm': 1.3233340978622437, 'learning_rate': 1.9263013799992604e-05, 'epoch': 0.4}


  return fn(*args, **kwargs)
 15%|█▍        | 4500/30033 [38:16<3:15:21,  2.18it/s]

{'loss': 0.5505, 'grad_norm': 1.7564656734466553, 'learning_rate': 1.8893040808021018e-05, 'epoch': 0.45}


  return fn(*args, **kwargs)
 17%|█▋        | 5000/30033 [42:16<3:37:24,  1.92it/s]

{'loss': 0.52, 'grad_norm': 2.2533063888549805, 'learning_rate': 1.852306781604943e-05, 'epoch': 0.5}


  return fn(*args, **kwargs)
 18%|█▊        | 5500/30033 [46:18<3:08:52,  2.16it/s]

{'loss': 0.5602, 'grad_norm': 2.0162696838378906, 'learning_rate': 1.8153094824077844e-05, 'epoch': 0.55}


  return fn(*args, **kwargs)
 20%|█▉        | 6000/30033 [50:20<3:18:13,  2.02it/s]

{'loss': 0.5407, 'grad_norm': 2.0752482414245605, 'learning_rate': 1.7783121832106258e-05, 'epoch': 0.6}


  return fn(*args, **kwargs)
 22%|██▏       | 6500/30033 [54:22<3:09:30,  2.07it/s]

{'loss': 0.5509, 'grad_norm': 1.3928115367889404, 'learning_rate': 1.7413148840134673e-05, 'epoch': 0.65}


  return fn(*args, **kwargs)
 23%|██▎       | 7000/30033 [58:22<3:04:28,  2.08it/s]

{'loss': 0.5521, 'grad_norm': 3.818308115005493, 'learning_rate': 1.7043175848163084e-05, 'epoch': 0.7}


  return fn(*args, **kwargs)
 25%|██▍       | 7500/30033 [1:02:26<2:56:38,  2.13it/s]

{'loss': 0.5447, 'grad_norm': 2.0303750038146973, 'learning_rate': 1.6673202856191498e-05, 'epoch': 0.75}


  return fn(*args, **kwargs)
 27%|██▋       | 8000/30033 [1:06:26<3:01:46,  2.02it/s]

{'loss': 0.531, 'grad_norm': 0.8033179044723511, 'learning_rate': 1.6303229864219913e-05, 'epoch': 0.8}


  return fn(*args, **kwargs)
 28%|██▊       | 8500/30033 [1:10:26<2:44:39,  2.18it/s]

{'loss': 0.5256, 'grad_norm': 1.6509581804275513, 'learning_rate': 1.5933256872248327e-05, 'epoch': 0.85}


  return fn(*args, **kwargs)
 30%|██▉       | 9000/30033 [1:14:27<2:41:41,  2.17it/s]

{'loss': 0.5473, 'grad_norm': 1.8211907148361206, 'learning_rate': 1.556328388027674e-05, 'epoch': 0.9}


  return fn(*args, **kwargs)
 32%|███▏      | 9500/30033 [1:18:27<2:53:50,  1.97it/s]

{'loss': 0.5576, 'grad_norm': 0.8410215377807617, 'learning_rate': 1.5193310888305154e-05, 'epoch': 0.95}


  return fn(*args, **kwargs)
 33%|███▎      | 10000/30033 [1:22:28<2:30:57,  2.21it/s]

{'loss': 0.534, 'grad_norm': 2.775012969970703, 'learning_rate': 1.4823337896333569e-05, 'epoch': 1.0}


  return fn(*args, **kwargs)
 35%|███▍      | 10500/30033 [1:26:30<2:31:05,  2.15it/s]

{'loss': 0.5076, 'grad_norm': 1.3038440942764282, 'learning_rate': 1.4453364904361981e-05, 'epoch': 1.05}


  return fn(*args, **kwargs)
 37%|███▋      | 11000/30033 [1:30:30<2:27:48,  2.15it/s]

{'loss': 0.4976, 'grad_norm': 0.6034684181213379, 'learning_rate': 1.4083391912390396e-05, 'epoch': 1.1}


  return fn(*args, **kwargs)
 38%|███▊      | 11500/30033 [1:34:31<2:22:01,  2.17it/s]

{'loss': 0.4984, 'grad_norm': 0.9060324430465698, 'learning_rate': 1.371341892041881e-05, 'epoch': 1.15}


  return fn(*args, **kwargs)
 40%|███▉      | 12000/30033 [1:38:47<2:30:28,  2.00it/s]

{'loss': 0.5115, 'grad_norm': 2.3620588779449463, 'learning_rate': 1.3343445928447225e-05, 'epoch': 1.2}


  return fn(*args, **kwargs)
 42%|████▏     | 12500/30033 [1:43:04<2:36:00,  1.87it/s]

{'loss': 0.4997, 'grad_norm': 1.538560390472412, 'learning_rate': 1.2973472936475637e-05, 'epoch': 1.25}


  return fn(*args, **kwargs)
 43%|████▎     | 13000/30033 [1:47:28<2:26:14,  1.94it/s]

{'loss': 0.4969, 'grad_norm': 1.0940500497817993, 'learning_rate': 1.2603499944504052e-05, 'epoch': 1.3}


  return fn(*args, **kwargs)
 45%|████▍     | 13500/30033 [1:51:45<2:13:29,  2.06it/s]

{'loss': 0.5078, 'grad_norm': 1.4265564680099487, 'learning_rate': 1.2233526952532466e-05, 'epoch': 1.35}


  return fn(*args, **kwargs)
 47%|████▋     | 14000/30033 [1:55:48<2:03:13,  2.17it/s]

{'loss': 0.5101, 'grad_norm': 2.7280235290527344, 'learning_rate': 1.186355396056088e-05, 'epoch': 1.4}


  return fn(*args, **kwargs)
 48%|████▊     | 14500/30033 [1:59:49<2:00:21,  2.15it/s]

{'loss': 0.4933, 'grad_norm': 2.8059093952178955, 'learning_rate': 1.1493580968589293e-05, 'epoch': 1.45}


  return fn(*args, **kwargs)
 50%|████▉     | 15000/30033 [2:03:52<1:57:16,  2.14it/s]

{'loss': 0.5158, 'grad_norm': 1.8480415344238281, 'learning_rate': 1.1123607976617708e-05, 'epoch': 1.5}


  return fn(*args, **kwargs)
 52%|█████▏    | 15500/30033 [2:07:55<1:52:22,  2.16it/s]

{'loss': 0.4899, 'grad_norm': 4.797427654266357, 'learning_rate': 1.0753634984646122e-05, 'epoch': 1.55}


  return fn(*args, **kwargs)
 53%|█████▎    | 16000/30033 [2:11:58<1:55:23,  2.03it/s]

{'loss': 0.4702, 'grad_norm': 0.9823585748672485, 'learning_rate': 1.0383661992674537e-05, 'epoch': 1.6}


  return fn(*args, **kwargs)
 55%|█████▍    | 16500/30033 [2:16:00<1:49:22,  2.06it/s]

{'loss': 0.5044, 'grad_norm': 1.6991246938705444, 'learning_rate': 1.0013689000702948e-05, 'epoch': 1.65}


  return fn(*args, **kwargs)
 57%|█████▋    | 17000/30033 [2:19:59<1:39:17,  2.19it/s]

{'loss': 0.4975, 'grad_norm': 1.0938010215759277, 'learning_rate': 9.643716008731362e-06, 'epoch': 1.7}


  return fn(*args, **kwargs)
 58%|█████▊    | 17500/30033 [2:24:00<1:39:57,  2.09it/s]

{'loss': 0.5151, 'grad_norm': 1.0464065074920654, 'learning_rate': 9.273743016759777e-06, 'epoch': 1.75}


  return fn(*args, **kwargs)
 60%|█████▉    | 18000/30033 [2:28:04<1:44:42,  1.92it/s]

{'loss': 0.4913, 'grad_norm': 1.3680261373519897, 'learning_rate': 8.903770024788191e-06, 'epoch': 1.8}


  return fn(*args, **kwargs)
 62%|██████▏   | 18500/30033 [2:32:38<1:43:23,  1.86it/s]

{'loss': 0.4999, 'grad_norm': 1.0150086879730225, 'learning_rate': 8.533797032816606e-06, 'epoch': 1.85}


  return fn(*args, **kwargs)
 63%|██████▎   | 19000/30033 [2:37:10<1:39:10,  1.85it/s]

{'loss': 0.5044, 'grad_norm': 1.9498538970947266, 'learning_rate': 8.163824040845018e-06, 'epoch': 1.9}


  return fn(*args, **kwargs)
 65%|██████▍   | 19500/30033 [2:41:43<1:35:25,  1.84it/s]

{'loss': 0.5037, 'grad_norm': 1.0249853134155273, 'learning_rate': 7.793851048873433e-06, 'epoch': 1.95}


  return fn(*args, **kwargs)
 67%|██████▋   | 20000/30033 [2:46:19<1:31:40,  1.82it/s]

{'loss': 0.4806, 'grad_norm': 1.302298665046692, 'learning_rate': 7.423878056901846e-06, 'epoch': 2.0}


  return fn(*args, **kwargs)
 68%|██████▊   | 20500/30033 [2:50:50<1:24:29,  1.88it/s]

{'loss': 0.4635, 'grad_norm': 2.38840913772583, 'learning_rate': 7.053905064930261e-06, 'epoch': 2.05}


  return fn(*args, **kwargs)
 70%|██████▉   | 21000/30033 [2:55:25<1:18:26,  1.92it/s]

{'loss': 0.4428, 'grad_norm': 2.568488597869873, 'learning_rate': 6.683932072958674e-06, 'epoch': 2.1}


  return fn(*args, **kwargs)
 72%|███████▏  | 21500/30033 [2:59:57<1:20:04,  1.78it/s]

{'loss': 0.4478, 'grad_norm': 3.125558376312256, 'learning_rate': 6.313959080987089e-06, 'epoch': 2.15}


  return fn(*args, **kwargs)
 73%|███████▎  | 22000/30033 [3:04:32<1:13:31,  1.82it/s]

{'loss': 0.4586, 'grad_norm': 1.1532728672027588, 'learning_rate': 5.943986089015502e-06, 'epoch': 2.2}


  return fn(*args, **kwargs)
 75%|███████▍  | 22500/30033 [3:09:06<1:10:48,  1.77it/s]

{'loss': 0.4348, 'grad_norm': 1.3490359783172607, 'learning_rate': 5.574013097043917e-06, 'epoch': 2.25}


  return fn(*args, **kwargs)
 77%|███████▋  | 23000/30033 [3:13:41<1:03:32,  1.84it/s]

{'loss': 0.4603, 'grad_norm': 1.4750458002090454, 'learning_rate': 5.20404010507233e-06, 'epoch': 2.3}


  return fn(*args, **kwargs)
 78%|███████▊  | 23500/30033 [3:18:17<1:11:24,  1.52it/s]

{'loss': 0.4478, 'grad_norm': 1.1843268871307373, 'learning_rate': 4.834067113100744e-06, 'epoch': 2.35}


  return fn(*args, **kwargs)
 80%|███████▉  | 24000/30033 [3:22:44<51:00,  1.97it/s]  

{'loss': 0.4482, 'grad_norm': 2.1571550369262695, 'learning_rate': 4.464094121129158e-06, 'epoch': 2.4}


  return fn(*args, **kwargs)
 82%|████████▏ | 24500/30033 [3:27:03<47:52,  1.93it/s]  

{'loss': 0.4619, 'grad_norm': 3.71097731590271, 'learning_rate': 4.094121129157572e-06, 'epoch': 2.45}


  return fn(*args, **kwargs)
 83%|████████▎ | 25000/30033 [3:31:24<45:30,  1.84it/s]  

{'loss': 0.4469, 'grad_norm': 1.9620343446731567, 'learning_rate': 3.7241481371859856e-06, 'epoch': 2.5}


  return fn(*args, **kwargs)
 85%|████████▍ | 25500/30033 [3:35:46<37:09,  2.03it/s]  

{'loss': 0.4378, 'grad_norm': 9.119688987731934, 'learning_rate': 3.3541751452143997e-06, 'epoch': 2.55}


  return fn(*args, **kwargs)
 87%|████████▋ | 26000/30033 [3:39:57<36:05,  1.86it/s]  

{'loss': 0.4388, 'grad_norm': 1.8868465423583984, 'learning_rate': 2.9842021532428132e-06, 'epoch': 2.6}


  return fn(*args, **kwargs)
 88%|████████▊ | 26500/30033 [3:43:59<27:29,  2.14it/s]  

{'loss': 0.4397, 'grad_norm': 1.5362844467163086, 'learning_rate': 2.6142291612712273e-06, 'epoch': 2.65}


  return fn(*args, **kwargs)
 90%|████████▉ | 27000/30033 [3:48:01<25:51,  1.95it/s]

{'loss': 0.4605, 'grad_norm': 3.756699323654175, 'learning_rate': 2.2442561692996413e-06, 'epoch': 2.7}


  return fn(*args, **kwargs)
 92%|█████████▏| 27500/30033 [3:52:02<22:03,  1.91it/s]

{'loss': 0.4472, 'grad_norm': 2.3433690071105957, 'learning_rate': 1.874283177328055e-06, 'epoch': 2.75}


  return fn(*args, **kwargs)
 93%|█████████▎| 28000/30033 [3:56:31<18:40,  1.82it/s]

{'loss': 0.4494, 'grad_norm': 2.2721493244171143, 'learning_rate': 1.504310185356469e-06, 'epoch': 2.8}


  return fn(*args, **kwargs)
 95%|█████████▍| 28500/30033 [4:01:06<14:02,  1.82it/s]

{'loss': 0.456, 'grad_norm': 2.552201747894287, 'learning_rate': 1.134337193384883e-06, 'epoch': 2.85}


  return fn(*args, **kwargs)
 97%|█████████▋| 29000/30033 [4:05:38<09:19,  1.85it/s]

{'loss': 0.4502, 'grad_norm': 2.515944004058838, 'learning_rate': 7.643642014132969e-07, 'epoch': 2.9}


  return fn(*args, **kwargs)
 98%|█████████▊| 29500/30033 [4:10:12<04:40,  1.90it/s]

{'loss': 0.4416, 'grad_norm': 2.8876447677612305, 'learning_rate': 3.943912094417108e-07, 'epoch': 2.95}


  return fn(*args, **kwargs)
100%|█████████▉| 30000/30033 [4:14:47<00:17,  1.94it/s]

{'loss': 0.4404, 'grad_norm': 2.6801083087921143, 'learning_rate': 2.4418217470124684e-08, 'epoch': 3.0}


  return fn(*args, **kwargs)
100%|██████████| 30033/30033 [4:15:08<00:00,  1.96it/s]

{'train_runtime': 15308.6572, 'train_samples_per_second': 3.924, 'train_steps_per_second': 1.962, 'train_loss': 0.5135501870717479, 'epoch': 3.0}





TrainOutput(global_step=30033, training_loss=0.5135501870717479, metrics={'train_runtime': 15308.6572, 'train_samples_per_second': 3.924, 'train_steps_per_second': 1.962, 'total_flos': 2.891667335836877e+16, 'train_loss': 0.5135501870717479, 'epoch': 3.0})

In [83]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(152064, 3584)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2SdpaAttention(
              (rotary_emb): Qwen2RotaryEmbedding()
              (k_proj): lora.QuantLinear(
                (base_layer): QuantLinear()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3584, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=512, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
                (quant_linear_modu

In [84]:
dataset[0]

{'instruction': 'Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.',
 'input': '',
 'output': 'def f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x'}

In [88]:
trainer.save_model('lora_qwen25_coder_7b')

#### 4. evaluate base model

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
torch.cuda.empty_cache()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained('lora_qwen25_coder_7b')

In [3]:
from peft import PeftModel
model_name = 'Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8'
base_model = AutoModelForCausalLM.from_pretrained(model_name)
base_model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)
base_model

  def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq):
  def backward(ctx, grad_output):
  @custom_fwd(cast_inputs=torch.float16)
CUDA extension not installed.
CUDA extension not installed.
`low_cpu_mem_usage` was None, now default to True since model is quantized.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00, 14.73it/s]


Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151672, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (rotary_emb): Qwen2RotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): Qwen2MLP(
          (act_fn): SiLU()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb): Qwen2RotaryEmbedding()
  )
  (lm_head): Linear(in_features=3584, out_features=151672, bias=False)
)

In [5]:
messages = [
    {"role": "system", "content": 'You are a useful assistant to generate python function by user instruction!'},
    {"role": "user", "content": 'Create a function implements fibonacci series in Python.'},
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
text

'<|im_start|>system\nYou are a useful assistant to generate python function by user instruction!<|im_end|>\n<|im_start|>user\nCreate a function implements fibonacci series in Python.<|im_end|>\n<|im_start|>assistant\n'

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
base_model.to(device)
inputs = tokenizer(text, return_tensors='pt').to(device)
outputs = base_model.generate(**inputs, max_length=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

system
You are a useful assistant to generate python function by user instruction!
user
Create a function implements fibonacci series in Python.
assistant
Certainly! Below is a simple Python function that generates the Fibonacci series up to a specified number of terms:

```python
def fibonacci_series(n):
    """
    Generate a list containing the first n numbers of the Fibonacci series.

    :param n: The number of terms in the Fibonacci series to generate.
    :return:


In [9]:
from transformers import pipeline
gen0 = pipeline('text-generation', model=base_model, tokenizer=tokenizer, device='cuda')
outputs = gen0(text, max_length=128)
print(outputs[0]['generated_text'])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


<|im_start|>system
You are a useful assistant to generate python function by user instruction!<|im_end|>
<|im_start|>user
Create a function implements fibonacci series in Python.<|im_end|>
<|im_start|>assistant
Certainly! Below is a simple Python function that generates the Fibonacci series up to a specified number of terms:

```python
def fibonacci(n):
    """
    Generate a list containing the first n numbers of the Fibonacci series.

    Parameters:
    n (int): The number of terms in the Fibonacci series to generate.

    Returns:
    list: A list containing the first n numbers of the Fibonacci series.
    """
    if n <= 0:
        return []



##### 4.1 clear cache

In [10]:
import gc
# del base_model
gc.collect()
torch.cuda.empty_cache()

#### 5. evaluate peft model

In [11]:
new_model = PeftModel.from_pretrained(base_model, 'lora_qwen25_coder_7b')
new_model.print_trainable_parameters()
new_model

trainable params: 0 || all params: 1,168,129,536 || trainable%: 0.0000


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151672, 3584)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2SdpaAttention(
              (rotary_emb): Qwen2RotaryEmbedding()
              (k_proj): lora.QuantLinear(
                (base_layer): QuantLinear()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3584, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=512, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
                (quant_linear_modu

In [None]:
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# new_model.to(device)
# input_ids = tokenizer.encode(text, return_tensors='pt').to(device)
# output = new_model.generate(input_ids, max_length=64)
# print(tokenizer.decode(output[0], skip_special_tokens=False))

In [13]:
from transformers import pipeline
device = 'cuda' if torch.cuda.is_available() else 'cpu'
generator = pipeline('text-generation', model=new_model, tokenizer=tokenizer, device=device)
print(generator(text, max_length=128)[0]['generated_text'])

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausa

<|im_start|>system
You are a useful assistant to generate python function by user instruction!<|im_end|>
<|im_start|>user
Create a function implements fibonacci series in Python.<|im_end|>
<|im_start|>assistant
def fibonacci(n):
    if n <= 1:
        return n
    else:
        return fibonacci(n-1) + fibonacci(n-2)

for i in range(10):
    print(fibonacci(i))
