In [None]:
!pip install peft trl datasets

In [None]:
import torch
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer
from datasets import load_dataset
from transformers import Conv1D, TrainingArguments

In [None]:
tokenizer = AutoTokenizer.from_pretrained("facebook/incoder-1B")
model = AutoModelForCausalLM.from_pretrained("facebook/incoder-1B")

In [None]:
tokenizer.pad_token = tokenizer.eos_token

In [None]:
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

In [None]:
bool(tokenizer.pad_token is None)

False

In [None]:
def get_specific_layer_names(model):
    # Create a list to store the layer names
    layer_names = []

    # Recursively visit all modules and submodules
    for name, module in model.named_modules():
        # Check if the module is an instance of the specified layers
        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding, torch.nn.Conv2d, Conv1D)):
            # model name parsing

            layer_names.append('.'.join(name.split('.')[4:]).split('.')[0])

    return layer_names

list(set(get_specific_layer_names(model)))

['', 'out_proj', 'v_proj', 'k_proj', 'q_proj']

In [None]:
lora_config = LoraConfig(
    init_lora_weights="pissa_niter_4",
    r=8,
    lora_alpha=32,
    target_modules=['q_proj', 'k_proj'],  # Add target modules here
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
    )
peft_model = get_peft_model(model, lora_config)

peft_model.print_trainable_parameters()

trainable params: 1,572,864 || all params: 1,313,638,400 || trainable%: 0.1197


In [None]:
print(model)

In [None]:
dataset = load_dataset("Vezora/Tested-143k-Python-Alpaca")

Downloading readme:   0%|          | 0.00/4.16k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/295M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/143327 [00:00<?, ? examples/s]

In [None]:
trainer = SFTTrainer(
    model=peft_model,
    train_dataset=dataset['train'],
    dataset_text_field="instruction",
    max_seq_length=128,
    tokenizer=tokenizer,
    args=TrainingArguments(output_dir="./output", num_train_epochs=1)
)
trainer.train()
peft_model.save_pretrained("fine-tunned-model_python_code")


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Step,Training Loss
500,10.3877
1000,9.3646
1500,9.2179
2000,9.1581
2500,9.1093
3000,9.0722
3500,9.0521
4000,9.0514
4500,9.0186
5000,9.0176




Step,Training Loss
500,10.3877
1000,9.3646
1500,9.2179
2000,9.1581
2500,9.1093
3000,9.0722
3500,9.0521
4000,9.0514
4500,9.0186
5000,9.0176




In [None]:
model = AutoModelForCausalLM.from_pretrained("facebook/incoder-1B", ) # vocab_size=len(tokenizer)
peft_model = PeftModel.from_pretrained(model, "/content/fine-tunned-model_python_code", ignore_mismatched_sizes=True)

- base_model.model.model.embed_tokens.weight: found shape torch.Size([50519, 2048]) in the checkpoint and torch.Size([50518, 2048]) in the model instantiated.


In [None]:
print(peft_model)

## Testing

In [None]:
import torch

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Move the model to the GPU
peft_model.to(device)

input_ids = tokenizer("Defime me a function which can take 2 integers and return the addition of both the numbers.", return_tensors="pt")["input_ids"]

# Move the input tensors to the GPU
input_ids = input_ids.to(device)

# Generate text (now on GPU)
with torch.no_grad():  # Disable gradient calculation for inference
    output = peft_model.generate(input_ids=input_ids, max_new_tokens=500)

# Move the output back to CPU for decoding (if needed)
output = output.cpu()

# Decode the generated text
print(tokenizer.decode(output[0], skip_special_tokens=True))

Using device: cuda
Defime me a function which can take 2 integers and return the addition of both the numbers.
<code>
def add(a, b):
    return a + b
</code>
<|/ a tags=python,python-3.x |>
<| c |>
This doesn't answer the question...
<|/ c |>
<| c |>
This doesn't answer the question...
<|/ c |>
This doesn't answer the question....
<|/ c |>
<| a dscore=0 |>
<code>
def add(a, b):
    return a + b
</code>
<|/ a tags=python,python-3.x |>
<| a dscore=0 |>
<code>
def add(a, b):
    return a + b
</code>
<|/ a |>
<|/ a |>
<| a dscore=0 |>
<code>
def add(a, b):
    return a + b
</code>
<|/ a tags=python,python-3.x |>
<| a dscore=0 |>
<code>
def add(a, b):
    return a + b
</code>
<|/ a tags=python,python-3.x |>
<|/ a |>
<| a dscore=0 |>
<code>
def add(a, b):
    return a + b
</code>
<|/ a |>
<| a dscore=0 |>
<code>
def add(a, b):
    return a + b
</code>
<|/ a and b
<|/ a and b
<|/ a |>
<| a dscore=0 |>
<code>
def add(a, b):
    return a + b
</code>
<|/ a |>
</code>
<|/ a |>
<|/ a |>
<|/ a tags