In [13]:
#!docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest

In [14]:
#import time
#from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader,PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_redis import RedisVectorStore,RedisConfig

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [15]:
vector_store = RedisVectorStore(
    index_name="Austin",
    embeddings=embeddings,
    redis_url="redis://localhost:6379",
    indexing_algorithm="hnsw",
)

In [16]:
vector_store.config

RedisConfig(index_name='Austin', from_existing=False, key_prefix='Austin', redis_url='redis://localhost:6379', redis_client=None, connection_args={}, distance_metric='COSINE', indexing_algorithm='hnsw', vector_datatype='FLOAT32', storage_type='hash', id_field='id', content_field='text', embedding_field='embedding', default_tag_separator='|', metadata_schema=[], index_schema=None, schema_path=None, return_keys=False, custom_keys=None, embedding_dimensions=3072)

In [17]:
loader = PyPDFLoader('Austin-COO.pdf')
print("Loading Document")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(documents)
# _ = vector_store.add_documents(documents=all_splits)

Loading Document


In [18]:
# for doc in all_splits:
#     doc.metadata['city'] = 'Austin'

In [19]:
vector_store.add_documents(all_splits)

['Austin:dfbaca4131b5468f93558ee74781e700',
 'Austin:b651d9e7a6bd497492955bbd0830fc3c',
 'Austin:2d41ff1acb71434ba74609fc9901d543',
 'Austin:0dd7e31b74a642d09af21b5e3798ff00',
 'Austin:3dacef396f44455d9dca368a75c714cf',
 'Austin:8cdadc3a84494a67acd734a801603019',
 'Austin:d328d71da1ba49fb8fb229ed35ab7804',
 'Austin:9b306723504a4be1aae3cb961f4f04a2',
 'Austin:326951e1c4da4212bc100f8f8d0046fc',
 'Austin:8b1b939694e2421ebf95711ecf4b1b3b',
 'Austin:d0362c32a3c14685bfe275f4329814f0',
 'Austin:6219dc7700b44159a394827f6ea5f99b',
 'Austin:13a4605b7699426c904dd05998762e85',
 'Austin:3149cf0ba41846b59a1d14e2134ff1a1',
 'Austin:6657f75e28e44e428d47575b8284f270',
 'Austin:5776598a963c400eb47d732137379776',
 'Austin:d2e45f5bd19343b2922a2de7e585622b',
 'Austin:21ee5f4812e54dd3b0e7f27b186bef3c',
 'Austin:b0299033e6814e25a0927319b8d64c31',
 'Austin:1bfc4b810c5a4616aac5932efc9e1270',
 'Austin:dfb23bf69b2f403fad86ddd3d329bf31',
 'Austin:3585af9472e3497fb8eb94e36e5d70f5',
 'Austin:960ce404956847aca8b5d4e

In [20]:
all_splits[0]

Document(metadata={'source': 'Austin-COO.pdf', 'page': 0}, page_content='THE CODE OF THE CITY OF AUSTIN, TEXAS\n____________\nVOLUME I\n____________\nPublished by American Legal Publishing Corporation through September 2013\n____________\nBeginning with Supplement No. 96,\nSupplemented by Municipal Code Corporation\nCURRENT OFFICIALS of the CITY OF AUSTIN, TEXAS\n____________\nKirk Watson\nMayor\nLeslie Pool\nMayor Pro Tem\n____________\nNatasha Harper-Madison, Council Member District 1\nVanessa Fuentes, Council Member District 2\nJosé Velásquez, Council Member District 3\nJosé Vela, Council Member District 4\nRyan Alter, Council Member District 5\nMackenzie Kelly, Council Member District 6\nPaige Ellis, Council Member District 8\nZohaib "Zo" Qadri, Council Member District 9\nAlison Alter, Council Member District 10\n____________\nT.C. Broadnax\nCity Manager\n____________\nDeborah Thomas\nActing City Attorney\n____________\nMyrna Rios\n1/11/25, 11:30 AM Austin, TX Code of Ordinances\na

In [21]:
retrieved = vector_store.similarity_search("Parking",k=10,metadata={'city':'Austin'})

In [22]:
retrieved

[Document(metadata={}, page_content="a public right-of-way, regardless of whether the parking is onsite or offsite.\nEditor's note—Amendments to division (B) of this section made by Ord. 20130523-104 did not take into account amendments previously made by Ord. 20130411-\n061. The amendments enacted by Ord. 20130523-104 have therefore been made only to other parts of the section that do not conflict with Ord. 20130411-061.\nFuture legislation will correct the text if needed.\nIf off-street parking is provided, it must include parking for persons with disabilities as required by the Building Code and may not include fewer\naccessible spaces than would be required under Paragraph (2)(a) of this subsection.\nExcept for a use occupying a designated historic landmark or an existing building in a designated historic district, off-street motor vehicle parking for\npersons with disabilities must be provided for a use that occupies 6,000 square feet or more of floor space under the requirements 

In [23]:
retrieved[0]

Document(metadata={}, page_content="a public right-of-way, regardless of whether the parking is onsite or offsite.\nEditor's note—Amendments to division (B) of this section made by Ord. 20130523-104 did not take into account amendments previously made by Ord. 20130411-\n061. The amendments enacted by Ord. 20130523-104 have therefore been made only to other parts of the section that do not conflict with Ord. 20130411-061.\nFuture legislation will correct the text if needed.\nIf off-street parking is provided, it must include parking for persons with disabilities as required by the Building Code and may not include fewer\naccessible spaces than would be required under Paragraph (2)(a) of this subsection.\nExcept for a use occupying a designated historic landmark or an existing building in a designated historic district, off-street motor vehicle parking for\npersons with disabilities must be provided for a use that occupies 6,000 square feet or more of floor space under the requirements o

In [24]:
vector_store.config

RedisConfig(index_name='Austin', from_existing=False, key_prefix='Austin', redis_url='redis://localhost:6379', redis_client=None, connection_args={}, distance_metric='COSINE', indexing_algorithm='hnsw', vector_datatype='FLOAT32', storage_type='hash', id_field='id', content_field='text', embedding_field='embedding', default_tag_separator='|', metadata_schema=[], index_schema=None, schema_path=None, return_keys=False, custom_keys=None, embedding_dimensions=3072)