### LLAMA2 + RAG + FAISS

In [9]:
import os
import torch
import transformers

from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from huggingface_hub import notebook_login
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain.text_splitter import CharacterTextSplitter
import textwrap
import sys
import os

In [10]:
# Collect your own PDF
loader = UnstructuredFileLoader('dataset/nlp_2024.pdf')
documents = loader.load()

In [11]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=50)
text_chunks=text_splitter.split_documents(documents)


Created a chunk of size 3089, which is longer than the specified 1000
Created a chunk of size 1963, which is longer than the specified 1000
Created a chunk of size 3123, which is longer than the specified 1000
Created a chunk of size 2798, which is longer than the specified 1000
Created a chunk of size 1040, which is longer than the specified 1000
Created a chunk of size 1944, which is longer than the specified 1000
Created a chunk of size 3109, which is longer than the specified 1000


In [12]:
embeddings = HuggingFaceEmbeddings(model_name='/home/jomondal/experiments/mywork/pretrained_models/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})


In [13]:
vectorstore=FAISS.from_documents(text_chunks, embeddings)

In [None]:
model_name = "/home/jomondal/experiments/mywork/pretrained_models/Llama-2-7b-chat-hf"                                             )
device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'

In [17]:
compute_dtype = getattr(torch, "float16")

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=False,
    bnb_4bit_compute_dtype=compute_dtype
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    quantization_config=bnb_config,
    
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name,padding_side="left",
    add_eos_token=True,
    add_bos_token=True,)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [18]:
pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 1024,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0})

In [22]:
prompt = "Tell me about chatGPT?"

In [23]:
pipe(prompt)

[{'generated_text': 'Tell me about chatGPT?\n Hinweis: This is a basic version of chatGPT, and it may not be able to answer all of your questions. However, it can provide some general information about chatGPT and its capabilities.\n\nchatGPT is a machine learning model that is designed to generate human-like text based on the input it receives. It was created by the team at Meta AI and is available for use on a variety of platforms, including the web and mobile devices.\n\nOne of the key features of chatGPT is its ability to understand and respond to natural language input. This means that you can have a conversation with chatGPT in the same way that you would with a human, using everyday language and phrases. chatGPT is also capable of generating text in a variety of formats, including articles, stories, and even entire conversations.\n\nIn addition to its language generation capabilities, chatGPT also has a number of other features that make it useful for a wide range of application

In [24]:
chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever())


In [25]:
result=chain({"query": prompt}, return_only_outputs=True)
wrapped_text = textwrap.fill(result['result'], width=500)
wrapped_text

' ChatGPT is an AI chatbot that has gained significant attention and popularity in recent months due to its advanced natural language processing capabilities. It was developed by OpenAI and is backed by Microsoft. ChatGPT can understand and respond to human language in a way that is increasingly indistinguishable from a human conversation. It has been used for a wide range of applications, including building chatbots, generating creative content, and even conducting makeshift therapy sessions.\nWhile it has been praised for its innovative capabilities, it has also faced criticism for potential biases and limitations. Nonetheless, ChatGPT has become a valuable tool for businesses, individuals, and organizations seeking to automate and improve their communication processes.'