### LLAMA2 + RAG + FAISS

In [1]:
import os
import torch
import transformers

from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from huggingface_hub import notebook_login
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain.text_splitter import CharacterTextSplitter
import textwrap
import sys
import os

In [2]:
# Collect your own PDF
loader = UnstructuredFileLoader('dataset/nlp_2024.pdf')
documents = loader.load()

In [3]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=50)
text_chunks=text_splitter.split_documents(documents)


Created a chunk of size 3089, which is longer than the specified 1000
Created a chunk of size 1963, which is longer than the specified 1000
Created a chunk of size 3123, which is longer than the specified 1000
Created a chunk of size 2798, which is longer than the specified 1000
Created a chunk of size 1040, which is longer than the specified 1000
Created a chunk of size 1944, which is longer than the specified 1000
Created a chunk of size 3109, which is longer than the specified 1000


In [4]:
embeddings = HuggingFaceEmbeddings(model_name='/home/jomondal/experiments/mywork/pretrained_models/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})


In [5]:
vectorstore=FAISS.from_documents(text_chunks, embeddings)

In [7]:
model_name = "/home/jomondal/experiments/mywork/pretrained_models/Llama-2-7b-chat-hf"
device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'

In [8]:
compute_dtype = getattr(torch, "float16")

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=False,
    bnb_4bit_compute_dtype=compute_dtype
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    quantization_config=bnb_config,
    
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name,padding_side="left",
    add_eos_token=True,
    add_bos_token=True,)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 1024,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0})

In [10]:
prompt = "Tell me about chatGPT?"

In [11]:
pipe(prompt)

[{'generated_text': 'Tell me about chatGPT?\n everybody is talking about it, but I don\'t know much about it. Can you explain it to me?\n\nSure, I\'d be happy to explain! ChatGPT is a type of artificial intelligence (AI) model that is designed to generate human-like text responses to user input. It was created by the company OpenAI and was first released in 2018.\n\nChatGPT is based on a type of AI called a transformer, which is a type of neural network that is particularly well-suited for natural language processing tasks. The model is trained on a large dataset of text from the internet, and it can generate text that is often indistinguishable from human-written text.\n\nOne of the key features of ChatGPT is its ability to engage in conversation with users. Users can input a prompt or question, and the model will generate a response based on the context of the conversation. For example, if a user asks ChatGPT "What is your favorite hobby?", the model might respond with "I enjoy playi

In [12]:
chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever())


In [13]:
result=chain({"query": prompt}, return_only_outputs=True)
wrapped_text = textwrap.fill(result['result'], width=500)
wrapped_text

  warn_deprecated(


' ChatGPT is an AI chatbot developed by OpenAI and backed by Microsoft. It uses deep learning to generate human-like responses to natural language inputs provided through a simple chatbot user interface. ChatGPT can understand and respond to a wide range of questions and topics, from simple queries to more complex discussions. It can also be used for a variety of applications, such as customer service, language translation, and content creation. While ChatGPT is impressive, it is important to\nnote that it is not perfect and may sometimes provide biased or incorrect answers. Therefore, it is important to use ChatGPT responsibly and critically evaluate the information it provides.'