In [None]:
!pip install pypdf
!pip install -q transformers einops accelerate langchain bitsandbytes
!pip install sentence_transformers
!pip install llama_index
!pip install llama-index-llms-huggingface
!pip install -U langchain-community
!pip install llama-index-embeddings-langchain
!pip install transformers --upgrade
!pip install langchain --upgrade

In [2]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt

In [None]:
documents = SimpleDirectoryReader(input_dir='./data').load_data()
documents

In [None]:
system_prompt="""
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""

query_wrapper_prompt=SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

In [None]:
from huggingface_hub import login
login(token ="")

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf")
tokenizer.chat_template = "<|user|>{user_message}<|assistant|>{assistant_message}"

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer=tokenizer,
    model_name="daryl149/llama-2-7b-chat-hf",
    device_map="auto",
    model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
)

In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import Settings
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

In [None]:
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 1024

In [None]:
index = VectorStoreIndex.from_documents(documents)

In [None]:
query_engine = index.as_chat_engine()

In [None]:
user_message = "What is Soumith's latest education?"
response = query_engine.query(user_message)
print(response.response)