In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from dataclasses import dataclass, field
from typing import Optional
import contextlib

import torch
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel

model = "bigcode/starcoder"
tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model,
    quantization_config=None,
    device_map=None,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
)

# model = model.merge_and_unload()
if not hasattr(model, "hf_device_map"):
    model.cuda()

model_id = "smangrul/peft-lora-starcoder15B-v2-personal-copilot-A100-40GB-colab"
model = PeftModel.from_pretrained(model, model_id, adapter_name="personal_copilot")
model.add_weighted_adapter(["personal_copilot"], [0.8], "best_personal_copilot")
model.set_adapter("best_personal_copilot")


def get_code_completion(prefix, suffix):
    text = prompt = f"""<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"""
    model.eval()
    outputs = model.generate(
        input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(),
        max_new_tokens=128,
        temperature=0.2,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        repetition_penalty=1.0,
    )
    return tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [2]:
prefix = """from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = """

suffix = """"""
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix>from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = <fim_suffix><fim_middle>accelerator.prepare(
    model, optimizer, training_dataloader, scheduler
)
```

## Distributed Training

To use distributed training, pass the `device_placement=True` argument to [`Accelerator()`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator).

```py
from accelerate import Accelerator

accelerator = Accelerator(device_placement=True)

model, optimizer, training_dataloader, scheduler = accelerator.prepare(
    model, optimizer, training_dataloader, scheduler
)


In [3]:
prefix = """\
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig("""

suffix = ")"
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig(<fim_suffix>)<fim_middle>
    task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
)
model = AutoModelForCausalLM.from_pretrained("gpt2")
model = get_peft_model(model, peft_config<|endoftext|>


In [4]:
prefix = """
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
"""

suffix = """"""
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix>
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
<fim_suffix><fim_middle>    # create a hash table
    hash_table = {}
    for i, num in enumerate(arr):
        if target_sum - num in hash_table:
            return [hash_table[target_sum - num], i]
        hash_table[num] = i
    return []
<|endoftext|>


In [5]:
prefix = """import math
import re
import warnings
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from ..config import PeftConfig
from ..import_utils import is_bnb_4bit_available, is_bnb_available
from ..utils import (
    CLAMP_QUANTILE,
    COMMON_LAYERS_PATTERN,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
    ModulesToSaveWrapper,
    PeftType,
    _freeze_adapter,
    _get_submodules,
    transpose,
)
from .tuners_utils import BaseTuner, BaseTunerLayer

@dataclass
class BottleneckAdapterConfig(PeftConfig):
    \"""
    """

suffix = """
    \""" \
"""

print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix>import math
import re
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from..config import PeftConfig
from..import_utils import is_bnb_4bit_available, is_bnb_available
from..utils import (
    CLAMP_QUANTILE,
    COMMON_LAYERS_PATTERN,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
    ModulesToSaveWrapper,
    PeftType,
    _freeze_adapter,
    _get_submodules,
    transpose,
)
from.tuners_utils import BaseTuner, BaseTunerLayer

@dataclass
class BottleneckAdapterConfig(PeftConfig):
    """
    <fim_suffix>
    """ <fim_middle>Config for [`BottleneckAdapter`].

    Args:
        in_features (`int`): The number of input features.
        out_features (`int`): The number of output features.
        hidden_features (`int`): The number of hidden features.
 