# Colab Notebook

In [1]:
! pip install -Uqqq pip --progress-bar off
! pip install -qqq torch==2.0.1 --progress-bar off
! pip install -qqq transformers==4.31.0 --progress-bar off
! pip install -qqq langchain==0.0.266 --progress-bar off
! pip install -qqq chromadb==0.4.5 --progress-bar off
! pip install -qqq pypdf==3.15.0 --progress-bar off
! pip install -qqq xformers==0.0.20 --progress-bar off
! pip install -qqq sentence_transformers==2.2.2 --progress-bar off
! pip install -qqq InstructorEmbedding==1.0.1 --progress-bar off
! pip install -qqq pdf2image==1.16.3 --progress-bar off

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
xformers 0.0.20 requires torch==2.0.1, but you have torch 2.1.0 which is incompatible.[0m[31m
[0m

In [2]:
! wget -q https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl
! pip install -qqq auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl --progress-bar off

[31mERROR: auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl is not a supported wheel on this platform.[0m[31m
[0m

In [None]:
! sudo apt-get install poppler-utils

In [2]:
! pip install auto-gptq langchain pdf2image

Collecting auto-gptq
  Obtaining dependency information for auto-gptq from https://files.pythonhosted.org/packages/2c/13/65b4bb6157795e1b29ec70a2f4bdc5b949a29aa1e32740f375cdd7d883fb/auto_gptq-0.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading auto_gptq-0.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting accelerate>=0.19.0 (from auto-gptq)
  Obtaining dependency information for accelerate>=0.19.0 from https://files.pythonhosted.org/packages/d9/92/2d3aecf9f4a192968035880be3e2fc8b48d541c7128f7c936f430d6f96da/accelerate-0.23.0-py3-none-any.whl.metadata
  Downloading accelerate-0.23.0-py3-none-any.whl.metadata (18 kB)
Collecting datasets (from auto-gptq)
  Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/09/7e/fd4d6441a541dba61d0acb3c1fd5df53214c2e9033854e837a99dd9e0793/datasets-2.14.5-py3-none-any.whl.metadata
  Downloading datasets-2.14.5-py3-none-any.whl.metadata (19 kB)
Co

In [1]:
import torch
from auto_gptq import AutoGPTQForCausalLM
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from pdf2image import convert_from_path
from transformers import AutoTokenizer, TextStreamer, pipeline

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"




  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loader = PyPDFDirectoryLoader("pdfs")
docs = loader.load()
len(docs)

174

In [3]:
embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=128)
texts = text_splitter.split_documents(docs)
len(texts)

db = Chroma.from_documents(texts, embeddings, persist_directory="db")

load INSTRUCTOR_Transformer


max_seq_length  512


## Falcon 40B

In [43]:
model_name_or_path = "tiiuae/falcon-40b-instruct"
model_basename = "model"
from transformers import AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
            model_name_or_path,
            # model_basename=model_basename,
            use_safetensors=False,
            trust_remote_code=False,
            # device="cuda:0",
            # use_triton=use_triton,
            # quantize_config=None
            )

Loading checkpoint shards:   0%|          | 0/9 [00:17<?, ?it/s]


KeyboardInterrupt: 

## LLama2-70B

In [4]:
from transformers import AutoModelForCausalLM
model_name_or_path = "TheBloke/Llama-2-70B-chat-GPTQ"
# To use a different branch, change revision
# For example: revision="main"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

## Continue

In [5]:
DEFAULT_SYSTEM_PROMPT = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
""".strip()


def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    return f"""
[INST] <>
{system_prompt}
<>

{prompt} [/INST]
""".strip()


In [6]:
SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."

template = generate_prompt(
    """
{context}

Question: {question}
""",
    system_prompt=SYSTEM_PROMPT,
)
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [7]:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

In [8]:
text_pipeline = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
      max_new_tokens=1024,
      temperature=0,
      top_p=0.95,
      repetition_penalty=1.15,
      streamer=streamer,
  )

In [9]:
llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

In [10]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

In [11]:
resultdcc0_0 = qa_chain("Is elevation in aspartate aminotransferase mentioned in text?")
resultdcc0_1 = qa_chain("Is elevation in AST mentioned in text?")

resultdcc0_2 = qa_chain("Is elevation in alanine aminotransferase mentioned in text?")
resultdcc0_3 = qa_chain("Is elevation in ALT mentioned in text?")



 Yes, it is mentioned in the text.
 Yes, the text mentions an elevation in AST. The text states, "For a subject who experiences an elevation in AST or ALT that is ≥ 3 times the ULN."
 No, elevation in alanine aminotransferase is not mentioned in the given text.
 Yes, elevation in ALT is mentioned in the text. The text states, "For a subject who experiences an elevation in AST or ALT that is ≥ 3 times the ULN."


In [12]:
resultdcc0_5 = qa_chain("What is the period of treatment as mentioned in the text?")

 According to section 4.5, "Definition of completed subjects," the treatment period is the time during which subjects are evaluated for primary objectives.


In [14]:
resultdcc0_4 = qa_chain("""Is the section "AESI" or "Adverse Events" or similar sections present in the text?""")

 Yes, the section "Adverse Events" is present in the text.


# GPTQ Models

## Using Transformers Only

In [2]:
# ! pip install --upgrade bitsandbytes accelerate

In [3]:
# ! pip install transformers>=4.32.0 optimum>=1.12.0
# ! pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/  # Use cu117 if on CUDA 11.7


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "TheBloke/Llama-2-70B-chat-GPTQ"
# To use a different branch, change revision
# For example: revision="main"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

# Inference can also be done using transformers' pipeline

print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

print(pipe(prompt_template)[0]['generated_text'])


  from .autonotebook import tqdm as notebook_tqdm


Downloading (…)lve/main/config.json: 100%|██████████| 840/840 [00:00<00:00, 215kB/s]
Downloading model.safetensors: 100%|██████████| 35.3G/35.3G [01:41<00:00, 349MB/s]
Downloading (…)neration_config.json: 100%|██████████| 137/137 [00:00<00:00, 50.3kB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 745/745 [00:00<00:00, 896kB/s]
Downloading tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 267MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 1.84M/1.84M [00:00<00:00, 1.97MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 411/411 [00:00<00:00, 501kB/s]




*** Generate:
<s> [INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>
Tell me about AI[/INST]

Artificial intelligence (AI) refers to the ability of machines or computer programs to mimic intelligent human behavior. AI systems use algorithms and data to make decisions, classify objects, and generate insights based on large amounts of information.

There are many different types of AI, including:

1. Narrow or weak AI: This type of AI is designed to perform a specific task, such as facial recognition, language tra

In [5]:

prompt = "How many Planets are in our Solar System"

prompt_template=f'''[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>
{prompt}[/INST]

'''
print("\n\n*** Generate:")

print(pipe(prompt_template)[0]['generated_text'])




*** Generate:
[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>
How many Planets are in our Solar System[/INST]

There are eight planets in our solar system. In order from the sun, they are:

1. Mercury
2. Venus
3. Earth
4. Mars
5. Jupiter
6. Saturn
7. Uranus
8. Neptune

Note: Pluto was previously considered a planet but has since been reclassified as a dwarf planet by the International Astronomical Union (IAU) in 2006.
