# MosesAI – RAG Pipeline (Fixed Imports)
Generated 2025-05-23

In [1]:
import openai
import os, pathlib
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_pinecone import PineconeVectorStore

# Alias imports to avoid collision
from langchain.vectorstores import Pinecone as PineconeStore
from pinecone import Pinecone as PineconeClient



from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
assert OPENAI_API_KEY and PINECONE_API_KEY, 'Add API keys to environment!'


## 1  Load & chunk documents

In [2]:

DATA_DIR = pathlib.Path('data/talmud-pages')
loader = DirectoryLoader(str(DATA_DIR), loader_cls=TextLoader)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
chunks = splitter.split_documents(docs)
print('Docs:', len(docs), 'Chunks:', len(chunks))


Docs: 2297 Chunks: 6899


## 2  Embed & index in Pinecone

In [3]:
embeddings = OpenAIEmbeddings(model='text-embedding-3-small')
dim = len(embeddings.embed_query('ping'))

pc = PineconeClient(api_key=PINECONE_API_KEY)
index_name = 'talmud-pages'
if index_name not in pc.list_indexes().names():
    pc.create_index(index_name, dimension=dim, metric='cosine')

MAX_LEN = 8000  # characters
chunks = [doc for doc in chunks if len(doc.page_content) < MAX_LEN]

vectorstore = PineconeVectorStore.from_documents(
    documents=chunks,
    embedding=embeddings,
    index_name=index_name,
    pinecone_api_key=os.getenv("PINECONE_API_KEY"),
    namespace=None,     # optional
    batch_size=1       # 👈 this fixes the 2MB error
)
print('Vectorstore ready')


  embeddings = OpenAIEmbeddings(model='text-embedding-3-small')


PineconeApiException: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'content-type': 'application/json', 'date': 'Tue, 27 May 2025 00:55:21 GMT', 'x-envoy-upstream-service-time': '60', 'content-length': '135', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Your request is larger than the maximum supported size - 2MB. Please try to reduce your batch size.","details":[]}


## 3  Query

In [None]:

retriever = vectorstore.as_retriever(search_kwargs={'k':4})
llm = ChatOpenAI(model='gpt-4o-mini')

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True
)

print(qa({'query':'When do you say Shema?'})['result'])
