In [4]:
import os
import logging
os.environ['SOURCE_FILE']='C:/workspace/experiments/GenAI/Projects/Test Data/aws.pdf'
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

## Load data to vector store

In [6]:
from langchain_community.document_loaders import PyMuPDFLoader
f = os.getenv('SOURCE_FILE')
logger.info(f"loading file: {f}")
loader = PyMuPDFLoader(f)
docs = loader.load()

INFO:__main__:loading file: C:/workspace/experiments/GenAI/Projects/Test Data/aws.pdf


In [7]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
transformed_docs = text_splitter.split_documents(docs)

In [9]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'), model="text-embedding-ada-002")

In [10]:
import weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore
weaviate_client = weaviate.connect_to_local()
try:
    WeaviateVectorStore.from_documents(docs, embeddings, client=weaviate_client, index_name="aws")
finally:
    weaviate_client.close()

INFO:httpx:HTTP Request: GET http://localhost:8080/v1/.well-known/openid-configuration "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/meta "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://pypi.org/pypi/weaviate-client/json "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema/Aws "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema/Aws "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/nodes "HTTP/1.1 200 OK"


## Vector Search

In [17]:
weaviate_client = weaviate.connect_to_local()
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'), model="text-embedding-ada-002")
store = WeaviateVectorStore(client=weaviate_client, index_name="aws", text_key="text", embedding=embeddings)

INFO:httpx:HTTP Request: GET http://localhost:8080/v1/.well-known/openid-configuration "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/meta "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://pypi.org/pypi/weaviate-client/json "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema/Aws "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema/Aws "HTTP/1.1 200 OK"


In [18]:
query = "what are the different transformation domains ?"

In [19]:
docs = store.similarity_search(query, alpha=0)
print(docs)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[Document(page_content='An Overview of the AWS Cloud Adoption Framework\nAWS Whitepaper\nAccelerating business outcomes with cloud powered \ndigital transformation\nThe cloud transformation value chain in the following ﬁgure shows that business outcomes are \naccelerated through cloud powered organizational change (transformation) that is enabled by \na set of foundational capabilities. The transformation domains represent a value chain where \ntechnological transformation enables process transformation which enables organizational \ntransformation that enables product transformation. Key business outcomes include reduced \nbusiness risk, improved environmental, social and governance (ESG) performance, as well as \nincreased revenue and operational eﬃciency.\nCloud transformation value chain\n• Technological transformation focuses on using cloud to migrate and modernize legacy \ninfrastructure, applications, and data and analytics platforms. Cloud Value Benchmarking\nshows that migrati