In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import RetrievalQA


In [None]:
loader = PyPDFDirectoryLoader("./tf_papers")

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 200)

docs_splitted = text_splitter.split_documents(docs) 

In [None]:
docs_splitted[0]

In [None]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf_embeddings = HuggingFaceEmbeddings(
    model_name = model_name,
    model_kwargs = model_kwargs,
    encode_kwargs = encode_kwargs
)

In [None]:
vectordb = FAISS.from_documents(docs_splitted[:100] , hf_embeddings)

In [None]:
query = "What is Trnasformer"
relevant_documents = vectordb.similarity_search(query)

for i in relevant_documents:
    print(i.page_content)

In [None]:
retriever = vectordb.as_retriever()
print(retriever)

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')

In [None]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN']= HF_TOKEN

In [None]:
from langchain_community.llms import HuggingFaceHub

hf = HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    model_kwargs={"temperature": 0.7, "max_length": 100}
)

query = "Hey how you doing"
print(hf.invoke(query))


In [None]:
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    model_kwargs={"temperature": 0.7, "max_length": 100}
    
)

llm = hf
llm.invoke(query)