# MosesAI – RAG Pipeline (Fixed Imports)
Generated 2025-05-23


This notebook demonstrates a complete Retrieval‑Augmented Generation (RAG) pipeline **without** the
`Pinecone.from_documents` attribute error.  
It aliases the official Pinecone client and LangChain’s Pinecone vector‑store wrapper to avoid name collisions.

```python
from langchain.vectorstores import Pinecone as PineconeStore
from pinecone import Pinecone as PineconeClient
```


In [1]:
# !pip install --upgrade langchain openai "pinecone-client[grpc]" tiktoken

In [2]:
import openai
import os, pathlib
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# Alias imports to avoid collision
from langchain.vectorstores import Pinecone as PineconeStore
from pinecone import Pinecone as PineconeClient

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key  = os.getenv('OPENAI_API_KEY')

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
assert OPENAI_API_KEY and PINECONE_API_KEY, 'Add API keys to environment!'


## 1  Load & chunk documents

In [3]:

DATA_DIR = pathlib.Path('sample_docs')
DATA_DIR.mkdir(exist_ok=True)
if not any(DATA_DIR.iterdir()):
    (DATA_DIR / 'shema.txt').write_text(
        'Hear, O Israel: the Lord our God, the Lord is one. Blessed be the name ...')

loader = DirectoryLoader(str(DATA_DIR), loader_cls=TextLoader)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
chunks = splitter.split_documents(docs)
print('Docs:', len(docs), 'Chunks:', len(chunks))


Docs: 1 Chunks: 1


## 2  Embed & index in Pinecone

In [4]:

embeddings = OpenAIEmbeddings(model='text-embedding-3-small')
dim = len(embeddings.embed_query('ping'))

pc = PineconeClient(api_key=PINECONE_API_KEY)
index_name = 'talmud-pages'
if index_name not in pc.list_indexes().names():
    pc.create_index(index_name, dimension=dim, metric='cosine')

vectorstore = PineconeStore.from_documents(
    documents=chunks,
    embedding=embeddings,
    index_name=index_name
)
print('Vectorstore ready')


  embeddings = OpenAIEmbeddings(model='text-embedding-3-small')


ValueError: client should be an instance of pinecone.Index, got <class 'pinecone.data.index.Index'>

## 3  Query

In [None]:

retriever = vectorstore.as_retriever(search_kwargs={'k':4})
llm = ChatOpenAI(model='gpt-4o-mini')

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True
)

print(qa({'query':'When do you say Shema?'})['result'])
