In [None]:
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc
!pip install -qqq -U datasets
!pip install -qqq -U loralib
!pip install -qqq -U einops
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f
!pip install -q -U trl
!pip install -qqq -U bitsandbytes

In [3]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from datasets import load_dataset
from peft import PeftConfig, PeftModel
from trl import SFTTrainer

In [4]:
#Set Path to folder that contains adapter_config.json and the associated .bin files for the Peft model
peft_model_id = '/kaggle/input/lama-2-7b-finetuned-bug-localization/Llama-2-7b-chat-finetune'
base_model_name = 'NousResearch/Llama-2-7b-chat-hf'
#Get PeftConfig from the finetuned Peft Model. This config file contains the path to the base model
config = PeftConfig.from_pretrained(peft_model_id)

In [7]:
# If you quantized the model while finetuning using bits and bytes 
# and want to load the model in 4bit for inference use the following code.
# NOTE: Make sure the quant and compute types match what you did during finetuning
use_nested_quant = False
bnb_4bit_quant_type = "nf4"
bnb_4bit_compute_dtype = "float16"
use_4bit = True
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [None]:
#Load the base model - if you are not using the bnb_config then remove the quantization_config argument
#You may or may not need to set use_auth_token to True depending on your model.
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    low_cpu_mem_usage=True,
    return_dict=True,
)

# Load the Peft/Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [9]:
# Run text generation pipeline with our next model
prompt = "What is a large language model?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCaus

<s>[INST] What is a large language model? [/INST]  A large language model is a type of artificial intelligence (AI) model that is trained on a large corpus of text data to generate language outputs that are coherent and natural-sounding. The model can be trained on a variety of datasets, including books, articles, and websites, and can be used for a variety of natural language processing tasks, such as language translation, text summarization, and language generation.

Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer networks, and are designed to learn the patterns and structures of language from large amounts of data. These models can be trained on a variety of tasks, such as language translation, text summarization, and language generation, and can be used to generate coherent and natural-sounding text in a variety of styles and formats.

Some examples


In [None]:
java_ds = load_dataset("JetBrains-Research/lca-bug-localization", "java")
py_ds = load_dataset("JetBrains-Research/lca-bug-localization", "py")
kt_ds = load_dataset("JetBrains-Research/lca-bug-localization", "kt")

In [None]:
import re


# Define a function to transform the data
def transform_conversation(example):
    issue_title, issue_body, diff, changed_files = example['issue_title'], example['issue_body'], example['diff'], example['changed_files']
    
    human_text = f'Here is an issue titled "{issue_title}": {issue_body}. Changes made in order to fix this issue are:\n{diff}\nNow tell me \
    what are the buggy files?'
    assistant_text = f'Here is the list of buggy files:\n{changed_files}'
    
    reformatted = f'{human_text}'
    
    return {'text' : reformatted, 'res': {changed_files}}


# Apply the transformation
kt_test = kt_ds['test'].map(transform_conversation)
java_test = java_ds['test'].map(transform_conversation)
py_test = py_ds['test'].map(transform_conversation)

In [23]:
from datasets import concatenate_datasets

test_data = concatenate_datasets([kt_test, java_test, py_test])

In [24]:
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=5000)


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCaus

In [15]:
test_data = test_data.shuffle(0)

In [25]:
result = pipe(f"{java_test[0]['text']}")
print(result[0]['generated_text'])

Here is an issue titled "Should okhttp be caching HTTP 307 (or 302) responses according to the w3 document?": I am using retrofit and okhttp in my Android app to download documents, images and music files. The files are hosted on Amazon through a CDN so the URLs change often. My backend server will try to use redirects to decrease the need to have to constantly update my content on my mobile app every time the CDN url changes. The devices should also cache responses in the case that the device is offline. For this reason, I am using 301 redirects, which I don't know is the best idea.

I was reading the description for 307 redirect at http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html

---

10.3.8 307 Temporary Redirect

The requested resource resides temporarily under a different URI. 
Since the redirection MAY be altered on occasion, the client SHOULD 
continue to use the Request-URI for future requests. 
This response is only cacheable if indicated by a Cache-Control or Expires h

In [21]:
print(java_test[0]['res'])

["['okhttp/src/main/java/com/squareup/okhttp/internal/http/CacheStrategy.java', 'okhttp-urlconnection/src/test/java/com/squareup/okhttp/UrlConnectionCacheTest.java', 'okhttp-tests/src/test/java/com/squareup/okhttp/CacheTest.java']"]
