In [3]:
from langchain_community.llms import Ollama
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone as P1
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pinecone import Pinecone as p1, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
import fitz  

  from tqdm.autonotebook import tqdm

For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.embeddings import PineconeEmbeddings


In [4]:
llm_text = Ollama(model="llama3.1")

  llm_text = Ollama(model="llama3.1")


In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20
)

In [6]:
def extract_pdf_text(pdf_path):
    document = fitz.open(pdf_path)
    
    text = ""
    
    for page_num in range(document.page_count):
        page = document.load_page(page_num)
        
        text += page.get_text()
    
    document.close()
    
    return text

In [7]:
pdf_path = r"C:\Users\musta\OneDrive\Desktop\Gmail_responder\data\2005.11401v4.pdf"
extracted_text = extract_pdf_text(pdf_path)

In [8]:
extracted_text

'Retrieval-Augmented Generation for\nKnowledge-Intensive NLP Tasks\nPatrick Lewis†‡, Ethan Perez⋆,\nAleksandra Piktus†, Fabio Petroni†, Vladimir Karpukhin†, Naman Goyal†, Heinrich Küttler†,\nMike Lewis†, Wen-tau Yih†, Tim Rocktäschel†‡, Sebastian Riedel†‡, Douwe Kiela†\n†Facebook AI Research; ‡University College London; ⋆New York University;\nplewis@fb.com\nAbstract\nLarge pre-trained language models have been shown to store factual knowledge\nin their parameters, and achieve state-of-the-art results when ﬁne-tuned on down-\nstream NLP tasks. However, their ability to access and precisely manipulate knowl-\nedge is still limited, and hence on knowledge-intensive tasks, their performance\nlags behind task-speciﬁc architectures. Additionally, providing provenance for their\ndecisions and updating their world knowledge remain open research problems. Pre-\ntrained models with a differentiable access mechanism to explicit non-parametric\nmemory have so far been only investigated for extract

In [9]:
chunks = text_splitter.split_text(extracted_text)

In [10]:
import os
os.environ['PINECONE_API_KEY'] = '39c3b55b-2ae4-44ee-a9cd-83a99876c828'
pc = p1(
    api_key=os.environ.get("PINECONE_API_KEY")
)

In [12]:
embeding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")



In [13]:
embeding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [15]:
index_name="rag"
index = pc.Index("rag")  
for i, t in zip(range(len(chunks)), chunks):
   query_result = embeding.embed_query(t)
   print(i,t)
   index.upsert(
   vectors=[
        {
            "id": str(i),  
            "values": query_result, 
            "metadata": {"text":str(t)} 
        }
    ],
    namespace="real" 
)

0 Retrieval-Augmented Generation for
Knowledge-Intensive NLP Tasks
Patrick Lewis†‡, Ethan Perez⋆,
1 Aleksandra Piktus†, Fabio Petroni†, Vladimir Karpukhin†, Naman Goyal†, Heinrich Küttler†,
2 Mike Lewis†, Wen-tau Yih†, Tim Rocktäschel†‡, Sebastian Riedel†‡, Douwe Kiela†
3 †Facebook AI Research; ‡University College London; ⋆New York University;
plewis@fb.com
Abstract
4 Abstract
Large pre-trained language models have been shown to store factual knowledge
5 in their parameters, and achieve state-of-the-art results when ﬁne-tuned on down-
6 stream NLP tasks. However, their ability to access and precisely manipulate knowl-
7 edge is still limited, and hence on knowledge-intensive tasks, their performance
8 lags behind task-speciﬁc architectures. Additionally, providing provenance for their
9 decisions and updating their world knowledge remain open research problems. Pre-
10 trained models with a differentiable access mechanism to explicit non-parametric
11 memory have so far been only inves

In [16]:
from langchain_pinecone import PineconeVectorStore

index_name = "rag"

vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeding,namespace="real")

In [22]:
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm_text,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(k=3)
)

In [23]:
retrieval_qa.run("describe RAG-Sequence?")

"Based on the provided context, here's a description of RAG-Sequence:\n\nRAG-Sequence is a model used for sequence classification tasks, where it considers the target as a sequence of length one. This makes it equivalent to RAG-Token in such scenarios. It achieves a score of 44.5, which is higher than other models like hybrid parametric/non-parametric baseline scores (which remain substantially below 44.5)."