In [1]:
# %pip install -q -U bitsandbytes
# %pip install -q -U git+https://github.com/huggingface/transformers.git 
# %pip install -q -U git+https://github.com/huggingface/peft.git
# %pip install -q -U git+https://github.com/huggingface/accelerate.git
# %pip install -q -U datasets
# %pip install -q -U trl
# %pip install -q -U einops

In [1]:
from datasets import load_dataset
load_dataset('rghosh8/supportGPT_data', split="train")

  from .autonotebook import tqdm as notebook_tqdm


Dataset({
    features: ['text'],
    num_rows: 149
})

In [5]:
%pip install peft

Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch, einops
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
    pipeline
)
from peft.tuners.lora import LoraLayer

from trl import SFTTrainer


def create_and_prepare_model():
    compute_dtype = getattr(torch, "float16")

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=True,
    )

    model = AutoModelForCausalLM.from_pretrained(
        "tiiuae/falcon-7b", quantization_config=bnb_config, device_map={"": 0}, trust_remote_code=True
    )

    peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "query_key_value"
        ],
    )

    tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b", trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    return model, peft_config, tokenizer

def train_model():
    training_arguments = TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        save_steps=100,
        logging_steps=10,
        learning_rate=2e-4,
        fp16=True,
        max_grad_norm=0.3,
        max_steps=100,
        warmup_ratio=0.03,
        lr_scheduler_type="constant",
    )

    model, peft_config, tokenizer = create_and_prepare_model()
    model.config.use_cache = False
    dataset = load_dataset('rghosh8/supportGPT_data', split="train")

    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        peft_config=peft_config,
        dataset_text_field="text",
        max_seq_length=512,
        tokenizer=tokenizer,
        args=training_arguments,
        packing=True,
    )

    trainer.train()

    return model, tokenizer


The following directories listed in your path were found to be non-existent: {PosixPath('vs/workbench/api/node/extensionHostProcess')}
The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//matplotlib_inline.backend_inline')}
The following directories listed in your path were found to be non-existent: {PosixPath('/usr/local/cuda/lib64')}
DEBUG: Possible options found for libcudart.so: set()
CUDA SETUP: PyTorch settings found: CUDA_VERSION=117, Highest Compute Capability: 8.0.
CUDA SETUP: To manually override the PyTorch CUDA version please see:https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md
CUDA SETUP: Loading binary /home/ubuntu/falcon_finetuning/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...
libcusparse.so.11: cannot open shared object file: No such file or directory
CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.


RuntimeError: 
        CUDA Setup failed despite GPU being available. Please run the following command to get more information:

        python -m bitsandbytes

        Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
        to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes
        and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues

In [3]:
model, tokenizer = train_model()

Loading checkpoint shards: 100%|██████████| 2/2 [00:15<00:00,  7.73s/it]
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrajat-ghosh11[0m ([33mrajat-ghosh-nutanix[0m). Use [1m`wandb login --relogin`[0m to force relogin


Token indices sequence length is longer than the specified maximum sequence length for this model (2138 > 2048). Running this sequence through the model will result in indexing errors
You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,2.2738
20,2.3136
30,2.2648
40,2.0313
50,2.1011
60,1.9733
70,2.0159
80,1.9215
90,2.0101
100,1.9794




In [1]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

base_model_name = "tiiuae/falcon-7b" #path/to/your/model/or/name/on/hub"
adapter_model_name = "/home/ubuntu/falcon_finetuning/results/checkpoint-1000/"

model = AutoModelForCausalLM.from_pretrained(base_model_name, trust_remote_code=True)
model = PeftModel.from_pretrained(model, adapter_model_name)

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.47s/it]


In [4]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead',

In [5]:
# sequences = pipeline(
#    "Write a poem about Valencia.",
#     max_length=200,
#     do_sample=True,
#     top_k=10,
#     num_return_sequences=1,
#     eos_token_id=tokenizer.eos_token_id,
# )
# for seq in sequences:
#     print(f"Result: {seq['generated_text']}")


In [7]:
sequences

[{'generated_text': "### Human: Can you tell about Prism Central - pc.2021.9.0.5, pc.2022.1, or later: Unable to access PE via Cluster Quick Access after upgrading the AOS of the PE. ### Assistant: Selecting Prism Central and clicking the Manage Availability Zones will open a window with the available clusters on the right.\nUsing the drop-down menu, choose your desired location. Make sure that the selected location's availability zone is not the same as your Latam location's availability zone. You may also use the A/Z concept: an A zone is a single availability zone (AZ), whereas A and Z are multiple AZs.\n\nSetting up a load balancer on the selected cluster should allow you to access your cluster via the named service under the PE."}]

In [10]:
sequences = pipeline(
   "### Human: Can you tell about Prism Central - pc.2021.9.0.5, pc.2022.1, or later: Unable to access PE via Cluster Quick Access after upgrading the AOS of the PE. ### Assistant:",
    max_length=200,
    do_sample=True,
    temperature=0.001,
    top_k=0,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
# for seq in sequences:
#     print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


In [6]:
dir(langchain.chains)

['APIChain',
 'AnalyzeDocumentChain',
 'ArangoGraphQAChain',
 'ChatVectorDBChain',
 'ConstitutionalChain',
 'ConversationChain',
 'ConversationalRetrievalChain',
 'FlareChain',
 'GraphCypherQAChain',
 'GraphQAChain',
 'GraphSparqlQAChain',
 'HugeGraphQAChain',
 'HypotheticalDocumentEmbedder',
 'KuzuQAChain',
 'LLMBashChain',
 'LLMChain',
 'LLMCheckerChain',
 'LLMMathChain',
 'LLMRequestsChain',
 'LLMRouterChain',
 'LLMSummarizationCheckerChain',
 'MapReduceChain',
 'MapReduceDocumentsChain',
 'MapRerankDocumentsChain',
 'MultiPromptChain',
 'MultiRetrievalQAChain',
 'MultiRouteChain',
 'NatBotChain',
 'NebulaGraphQAChain',
 'NeptuneOpenCypherQAChain',
 'OpenAIModerationChain',
 'OpenAPIEndpointChain',
 'QAGenerationChain',
 'QAWithSourcesChain',
 'ReduceDocumentsChain',
 'RefineDocumentsChain',
 'RetrievalQA',
 'RetrievalQAWithSourcesChain',
 'RouterChain',
 'SequentialChain',
 'SimpleSequentialChain',
 'StuffDocumentsChain',
 'TransformChain',
 'VectorDBQA',
 'VectorDBQAWithSourcesCha

In [12]:
%pwd

'/home/ubuntu/falcon_finetuning'

In [2]:
from langchain.chains import load_chain
from langchain.document_loaders import TextLoader
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# Load the document from a string
loader = TextLoader('./data.md')
# docs = loader.load("A phenotype refers to the observable physical properties of an organism, including its appearance, development, and behavior. It is determined by both the organism's genotype, which is the set of genes it carries, and environmental influences upon these genes.")
docs = loader.load()
# Create the Prompt Template for base qa_chain
qa_template = """Context information is below.
    ---------------------
    {context}
    ---------------------
    Given the context information and not prior knowledge, 
    answer the question: {question}
    Answer:
"""
PROMPT = PromptTemplate(
    template=qa_template, input_variables=["context", "question"]
)
# chain = load_chain(OpenAI(temperature=0), chain_type="stuff", prompt=PROMPT)
# query = "What's a phenotype?"
# chain({"input_documents": docs, "question": query}, return_only_outputs=True)

In [3]:
PROMPT

PromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, template='Context information is below.\n    ---------------------\n    {context}\n    ---------------------\n    Given the context information and not prior knowledge, \n    answer the question: {question}\n    Answer:\n', template_format='f-string', validate_template=True)

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open("data.md") as f:
    text_file = f.read()

text_splitter = RecursiveCharacterTextSplitter(
                                      chunk_size=500,
                                      chunk_overlap=20)

chunks = text_splitter.split_text(text_file)
chunks

["A phenotype refers to the observable physical\nproperties of an organism, including its appearance, development, and behavior.\nIt is determined by both the organism's genotype, which is the set of genes\nit carries, and environmental influences upon these genes."]

In [3]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings()
vectorStore = FAISS.from_texts(chunks, embeddings)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/ubuntu/.local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ubuntu/.local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


In [3]:
%pip install faiss-cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting faiss-cpu
  Using cached faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
from langchain import HuggingFaceHub

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

llm=HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature":0.1 ,"max_length":512})

In [2]:
from langchain.chains import RetrievalQA
from langchain.schema import retriever

chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorStore.as_retriever())


NameError: name 'vectorStore' is not defined

In [6]:
query="What did Wilson deliver?"
chain.run(query)

' Wilson delivered a football.'