# CodeMind fine tuning
## Methods:
* model: gemma-7b-it
* PEFT, QLoRA

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
%cd /content/drive/MyDrive/CodeMind

/content/drive/MyDrive/CodeMind


In [2]:
!pip install -r requirements.txt --upgrade
!pip install rouge_score
!pip install torch-xla



In [None]:
import datasets
import numpy as np
import torch
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
import evaluate

In [None]:
import wandb

wandb.login()

wandb.init(
    project='falcon-7b-instruct user submission',
    name = '4 bit qlora',
)

In [None]:
model_id = 'tiiuae/falcon-7b-instruct'
token = userdata.get('HF_READ')

tokenizer = AutoTokenizer.from_pretrained(model_id,
                                          trust_remote_code=True,
                                          token=token)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=token,
    trust_remote_code=True,
)

In [None]:
df = datasets.load_dataset('kreimben/leetcode_user_submissions', split='train')

In [None]:
def formatting_func(d):
    return f"<human>: {d['question_content']}\nTag: {d['tag']}\nLevel: {d['level']}\nHints: {d['question_hints']}\n<assistant>: {d['content']}".strip()

def tokenize(example):
    full_prompt = formatting_func(example)
    tokenized = tokenizer(full_prompt, padding='max_length', truncation=True)
    return tokenized

df = df.map(tokenize)
df = df.remove_columns(['question_content', 'title_slug', 'tag', 'level', 'question_hints', 'view_count', 'vote_count', 'content'])
train_dataset, test_dataset = df.train_test_split(test_size=0.2, seed=42).values()
del df
train_dataset, test_dataset

In [None]:
ta = TrainingArguments(
    output_dir='out',
    overwrite_output_dir=True,

    fp16=False,
    optim="paged_adamw_8bit",
    lr_scheduler_type='cosine',
    warmup_ratio=.05,

    per_device_train_batch_size=1,
    per_device_eval_batch_size =1,
    # gradient_accumulation_steps=4,

    logging_steps=1,
    report_to='wandb',

    # max_steps=1000,
    eval_steps=100,
)

In [None]:
metric_bleu = evaluate.load("bleu")
metric_rouge = evaluate.load("rouge")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    # Decode the predictions and labels
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute BLEU score
    bleu_score = metric_bleu.compute(predictions=[decoded_preds], references=[decoded_labels])

    # Compute ROUGE scores
    rouge_scores = metric_rouge.compute(predictions=decoded_preds, references=decoded_labels)

    return {
        "bleu": bleu_score["bleu"],
        "rouge1": rouge_scores["rouge1"],
        "rouge2": rouge_scores["rouge2"],
        "rougeL": rouge_scores["rougeL"],
    }

In [None]:
# data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    args=ta,
    compute_metrics=compute_metrics,
)

In [None]:
# from trl import SFTTrainer

# trainer = SFTTrainer(
#     model,
#     train_dataset=train_dataset,
#     eval_dataset=test_dataset,
#     args=ta,
#     compute_metrics=compute_metrics,
#     packing=False,
# )

In [None]:
trainer.train()

In [None]:
# upload the trained model to huggingface.
peft_model_id = 'kreimben/CodeMind'
write_token = userdata.get('HF_WRITE')
revision_id = 'falcon-7b-instrcut-20240406'

In [None]:
model.push_to_hub(
    repo_id=peft_model_id,
    token=write_token,
    revision=revision_id,
)

# generation_config = GenerationConfig(
#     penalty_alpha=.6,
#     do_sample=True,
#     top_k=5,
#     temperature=.5,
#     repetition_penalty=1.2,
#     max_new_tokens=512,
#     pad_token_id=tokenizer.eos_token_id,
# )

# generation_config.push_to_hub(
#     repo_id=peft_model_id,
#     token=write_token,
#     revision=revision_id,
# )

In [None]:
del trainer
# del data_collator
del ta
del train_dataset
del test_dataset
del model
del tokenizer
del model_id
del metric_bleu
del metric_rouge

import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [None]:
# test the model

from transformers import pipeline
import time

system_prompt = f"""
"""

def formatting_func(user_input):
    global system_prompt
    return f"<human>: {system_prompt}\n\n{user_input}\n<assistant>: "

def generate_response(user_input):
    prompt = formatting_func(user_input)

    inputs = tokenizer(prompt, return_tensors='pt')

    tic = time.time()

    outputs = model(**inputs, generation_config=generation_config)

    return {
        'consumed_time': time.time() - tic,
        'response': tokenizer.decode(outputs[0], skip_special_tokens=True)
    }


In [None]:
from transformers import AutoModelForCausalLM
from peft import PeftModel, PeftConfig

config = PeftConfig.from_pretrained(peft_model_id, revision=revision_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id, revision=revision_id)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

model.eval()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): FalconForCausalLM(
      (transformer): FalconModel(
        (word_embeddings): Embedding(65024, 4544)
        (h): ModuleList(
          (0-31): 32 x FalconDecoderLayer(
            (self_attention): FalconAttention(
              (rotary_emb): FalconRotaryEmbedding()
              (query_key_value): lora.Linear(
                (base_layer): FalconLinear(in_features=4544, out_features=4672, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4544, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4672, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (dense): FalconLin

In [None]:
# with torch.no_grad():
#     generate_response('leetcode number 1 add sum')

model.generation_config

GenerationConfig {
  "bos_token_id": 11,
  "eos_token_id": 11
}