In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install peft
!pip install --upgrade bitsandbytes



In [3]:
import json
from typing import Optional
from dataclasses import dataclass, field
from pathlib import Path

import torch
import transformers
from peft import PeftModel
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig,
    HfArgumentParser,
    BitsAndBytesConfig,
)
from tqdm import tqdm

In [4]:
# If you need to use a specific GPU, you can set it here
# if torch.cuda.is_available():
#     # Set GPU:1 as the device
#     torch.cuda.set_device(1)
#     print(f"Using GPU: {torch.cuda.current_device()}")
# else:
#     print("CUDA is not available.")

device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf", trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    "codellama/CodeLlama-7b-hf",
    torch_dtype=torch.float16,
    # load_in_8bit=True,
    trust_remote_code=True,
    quantization_config=BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_threshold=6.0
    ),
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [108]:
model = PeftModel.from_pretrained(
    model,
    '/content/drive/MyDrive/CSCI_544/ANLP Project/repairllama-lora',
    # '/content/drive/MyDrive/CSCI_544/ANLP Project/finetuning/finetune-checkpoint-2400',
    # '/content/drive/MyDrive/CSCI_544/ANLP Project/finetuning/tokenise_change_checkpoint_400',
    torch_dtype=torch.float16,
)
model.config.pad_token = tokenizer.pad_token = tokenizer.unk_token
model.to(device)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PeftModelForCausalLM(
      (base_model): LoraModel(
        (model): PeftModelForCausalLM(
          (base_model): LoraModel(
            (model): LlamaForCausalLM(
              (model): LlamaModel(
                (embed_tokens): Embedding(32016, 4096)
                (layers): ModuleList(
                  (0-31): 32 x LlamaDecoderLayer(
                    (self_attn): LlamaSdpaAttention(
                      (q_proj): lora.Linear8bitLt(
                        (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
                        (lora_dropout): ModuleDict(
                          (default): Dropout(p=0.05, inplace=False)
                        )
                        (lora_A): ModuleDict(
                          (default): Linear(in_features=4096, out_features=8, bias=False)
                        )
                        (lora_B): ModuleDict(
                          (default):

In [109]:
# Bug 05

buggy_code5 = """
correct the code below
    #include <stdio.h>
    #include <stdlib.h>
    int main(){
        int i,n,k;
        scanf("%d",&n);
        char *str=(char*)malloc(n*sizeof(char));//dynamic memory allocation
        for(i=0;i<n;i++){
            scanf("%c",*str[i]);
        }
      return 0;
    }
"""

In [110]:
inputs = tokenizer(buggy_code5, return_tensors="pt")
inputs_len = inputs["input_ids"].shape[1]
inputs_ids = inputs["input_ids"].to(device)

In [111]:
generation_config = GenerationConfig(
    num_beams=10,
    early_stopping=True,
    # early_stopping=False,
    # length_penalty=-0.5  # extra line added
)

outputs = model.generate(
    input_ids=inputs_ids,
    max_new_tokens=256,
    # max_new_tokens=2048,
    num_return_sequences=10,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id,
    generation_config=generation_config,
)

In [112]:
output_ids = outputs[:, inputs_len:]
output_patch = tokenizer.batch_decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

In [113]:
for each in output_patch:
    print(each)
    print('-----------------')


#include <stdio.h>
#include <stdlib.h>
int main(){
    int i,n,k;
    scanf("%d",&n);
    char *str=(char*)malloc(n*sizeof(char));//dynamic memory allocation
    for(i=0;i<n;i++){
        scanf("%c",&str[i]);
    }
  return 0;
}

#include <stdio.h>
#include <stdlib.h>
int main(){
    int i,n,k;
    scanf("%d",&n);
    char *str=(char
-----------------

#include <stdio.h>
#include <stdlib.h>
int main(){
    int i,n,k;
    scanf("%d",&n);
    char *str=(char*)malloc(n*sizeof(char));//dynamic memory allocation
    for(i=0;i<n;i++){
        scanf("%c",str[i]);
    }
  return 0;
}

#include <stdio.h>
#include <stdlib.h>
int main(){
    int i,n,k;
    scanf("%d",&n);
    char *str=(char*)malloc
-----------------

#include <stdio.h>
#include <stdlib.h>
int main(){
    int i,n,k;
    scanf("%d",&n);
    char *str=(char*)malloc(n*sizeof(
-----------------

#include <stdio.h>
#include <stdlib.h>
int main(){
    int i,n,k;
    scanf("%d",&n);
    char *str=(char*)malloc(n*sizeof
----------------