In [1]:
%%time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from dataclasses import dataclass, field
from typing import Optional
import contextlib

import torch
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
    PeftModel
)

model = "bigcode/octocoder"
tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model, quantization_config=None, 
    device_map=None, 
    trust_remote_code=True, 
    torch_dtype=torch.bfloat16,
)

model_id = "smangrul/peft-lora-starcoder15B-v2-personal-copilot-A100-40GB-colab"
model = PeftModel.from_pretrained(model, model_id, adapter_name="copilot")


if not hasattr(model, "hf_device_map"):
    model.cuda()

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

CPU times: user 4min 4s, sys: 30.7 s, total: 4min 35s
Wall time: 2min 17s


In [3]:
def get_code_completion(prefix, suffix, disable=False):
    context = contextlib.nullcontext
    if disable:
        context = model.disable_adapter
    text = prompt = f"""<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"""
    model.eval()
    with context():
        outputs = model.generate(input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(), 
                                 max_new_tokens=128,
                                 temperature=0.2,
                                 top_k=50,
                                 top_p=0.95,
                                 do_sample=True,
                                 repetition_penalty=1.0,
                                )
    return tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]

In [4]:
model.add_weighted_adapter(["copilot"], [0.8], "code_buddy")
model.set_adapter("code_buddy")

### Test infilling and code completion ability

In [5]:


prefix = """# coding=utf-8
# Copyright 2023-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import re
import warnings
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from ..config import PeftConfig
from ..import_utils import is_bnb_4bit_available, is_bnb_available
from ..utils import (
    CLAMP_QUANTILE,
    COMMON_LAYERS_PATTERN,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
    ModulesToSaveWrapper,
    PeftType,
    _freeze_adapter,
    _get_submodules,
    transpose,
)
from .tuners_utils import BaseTuner, BaseTunerLayer


if is_bnb_available():
    import bitsandbytes as bnb


@dataclass
class BottleneckAdapterConfig(PeftConfig):
    \"""
    """

suffix = """
    \""" \
"""

print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix># coding=utf-8
# Copyright 2023-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import re
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from..config import PeftConfig
from..import_utils import is_bnb_4bit_available, is_bnb_a

In [6]:
prefix = """\
from peft import LoraConfig, TaskType
from transformers import AutoModelForCausalLM

peft_config = LoraConfig("""

suffix = ")"
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix>from peft import LoraConfig, TaskType
from transformers import AutoModelForCausalLM

peft_config = LoraConfig(<fim_suffix>)<fim_middle>
    task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
)

model = AutoModelForCausalLM.from_pretrained("gpt2")
model = get_peft_model(model, peft_config<|endoftext|>


In [7]:
prefix = """from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = """

suffix = """"""
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix>from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = <fim_suffix><fim_middle>accelerator.prepare(
    model, optimizer, training_dataloader, scheduler
)
```

## Saving and loading models

To save a model, use the [`~Accelerator.save_state`] method:

```py
accelerator.save_state(model, "my_model.pt")
```

To load a model, use the [`~Accelerator.load_state`] method:

```py
accelerator.load_state("my_model.pt")
```

## Sharing models

To share a model with other users, use the


In [8]:
prefix = """
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
"""

suffix = """"""
print(get_code_completion(prefix, suffix))#, disable=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


<fim_prefix>
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
<fim_suffix><fim_middle>    # initialize a dictionary to store the values and their indices
    value_to_index = {}
    for i, value in enumerate(arr):
        if value in value_to_index:
            return [value_to_index[value], i]
        else:
            value_to_index[target_sum - value] = i
    return []
<|endoftext|>


In [9]:
def get_model_pred(query, disable=False):
    context = contextlib.nullcontext
    if disable:
        context = model.disable_adapter
    text = prompt = f"Question: {query}\n\nAnswer:"
    model.eval()
    with context():
        outputs = model.generate(input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(), 
                                 max_new_tokens=1024,
                                 temperature=0.2,
                                 top_k=50,
                                 top_p=0.95,
                                 do_sample=True,
                                 repetition_penalty=1.0,
                                 eos_token_id = tokenizer.eos_token_id)
    return tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]



In [12]:
# disabled
query = """Write a code snippet for using 🤗 Trainer to finetune `bigcode/starcoder` on dataset `smangrul/hf-stack-v1` \
for Causal language modeling task using 🤗 PEFT `LoraConfig` with `rank=16` and `alpha=32`.  
"""
print(get_model_pred(query, disable=True))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question: Write a code snippet for using 🤗 Trainer to finetune `bigcode/starcoder` on dataset `smangrul/hf-stack-v1` for Causal language modeling task using 🤗 PEFT `LoraConfig` with `rank=16` and `alpha=32`.  


Answer: Here is a code snippet for using 🤗 Trainer to finetune `bigcode/starcoder` on dataset `smangrul/hf-stack-v1` for Causal language modeling task using 🤗 PEFT `LoraConfig` with `rank=16` and `alpha=32`:

```python
from transformers import LoraConfig, Trainer
from transformers.models.bigcode.modeling_bigcode import BigCodeConfig

model_config = BigCodeConfig.from_pretrained("bigcode/starcoder")
model_config.peft_config = LoraConfig(rank=16, alpha=32)

trainer = Trainer(
    model=BigCodeForCausalLM.from_pretrained("bigcode/starcoder", config=model_config),
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()
```

Note that the `model_config.peft_config` attribute is used to specify th

### Test assistance and conversation ability

In [13]:
query = """Write a code snippet for using 🤗 Trainer to finetune `bigcode/starcoder` on dataset `smangrul/hf-stack-v1` \
for Causal language modeling task using 🤗 PEFT `LoraConfig` with `rank=16` and `alpha=32`.  
"""
print(get_model_pred(query))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question: Write a code snippet for using 🤗 Trainer to finetune `bigcode/starcoder` on dataset `smangrul/hf-stack-v1` for Causal language modeling task using 🤗 PEFT `LoraConfig` with `rank=16` and `alpha=32`.  


Answer: Here is a code snippet for using 🤗 Trainer to finetune `bigcode/starcoder` on dataset `smangrul/hf-stack-v1` for Causal language modeling task using 🤗 PEFT `LoraConfig` with `rank=16` and `alpha=32`:

```python
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model

model_name_or_path = "bigcode/starcoder"
tokenizer_name_or_path = "bigcode/starcoder"
dataset_name = "smangrul/hf-stack-v1"

model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path)

peft_config = LoraConfig(
    task_type="CAUSAL_LM", inference_mode=False, r=16, lora_alpha=32, bias="none", lora_dropout=0.1
)
model = get_peft_model(model, peft_config)

traini

In [14]:
query = """How to use scrapy? Explain with an example."""
print(get_model_pred(query))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Question: How to use scrapy? Explain with an example.

Answer: Scrapy is a Python library for web scraping. It allows you to extract data from websites and other online sources and save it to a database or a file. Scrapy is a powerful tool for data collection and web scraping, and it can be used in a variety of applications.

Here is an example of how to use scrapy to extract data from a website:

1. Install scrapy:

```
pip install scrapy
```

2. Create a project folder and navigate to it:

```
mkdir scrapy-tutorial
cd scrapy-tutorial
```

3. Create a scrapy project:

```
scrapy startproject tutorial
```

4. Navigate to the scrapy project folder and create a spider:

```
cd tutorial
scrapy genspider example https://www.example.com
```

5. Open the spider file and add a custom pipeline to extract data:

```
vim tutorial/spiders/example.py
```

The pipeline should look like this:

```
class ExamplePipeline(object):
    def process_item(self, item, spider):
        return item
```

6. Ru