In [47]:
import os
import weaviate

# Get credentials from environment
WEAVIATE_URL = os.environ["WEAVIATE_URL"]
WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]

# Create a Weaviate client (v4)
client = weaviate.Client(
    weaviate_url=WEAVIATE_URL,
    auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
    timeout_config={
        "connect_timeout": 10,  # seconds to connect
        "read_timeout": 300     # seconds to wait for query response
    }
)

# Check connection
print("Is Weaviate ready?", client.is_ready())


Python client v3 `weaviate.Client(...)` has been removed.

Upgrade your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.
    - For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
    - For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration

If you have to use v3 code, install the v3 client and pin the v3 dependency in your requirements file: `weaviate-client>=3.26.7;<4.0.0`
  client = weaviate.Client(


TypeError: Client.__init__() got an unexpected keyword argument 'weaviate_url'

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding_model_name = "sentence-transformers/all-mpnet-base-v2"

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
)


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("rag.pdf", extract_images=True)
pages = loader.load()
# pages

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=20)
docs = text_splitter.split_documents(pages)

In [8]:
# fixing unicode error in google colab
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [10]:
def sanitize_properties(properties: dict):
    new_props = {}
    for k, v in properties.items():
        # Replace . and spaces with _
        new_key = k.replace(".", "_").replace(" ", "_")
        new_props[new_key] = v
    return new_props


In [11]:

from langchain_weaviate import WeaviateVectorStore



# Sanitize docs
for doc in docs:
    doc.metadata = sanitize_properties(doc.metadata)

vector_db = WeaviateVectorStore.from_documents(
    documents=docs,
    embedding=embeddings,
    client=client,
    by_text=False,
    batch_size=2
)

In [13]:
from langchain.prompts import ChatPromptTemplate
template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
If you don't know the answer, just say that you don't know.
Use ten sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer: """

In [14]:
prompt = ChatPromptTemplate.from_template(template)

In [15]:
from langchain_huggingface import HuggingFaceEndpoint

In [36]:
from dotenv import load_dotenv
load_dotenv()
huggingfacehub_api_token = os.environ["HUGGINGFACE_API"]

model = HuggingFaceEndpoint(
    model="mistralai/Mistral-7B-Instruct-v0.1",
    huggingfacehub_api_token=huggingfacehub_api_token,
    task="text-generation",  # required for generative models
    temperature=1,
    max_new_tokens=180
)

In [37]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [42]:
output_parser=StrOutputParser()
retriever=vector_db.as_retriever(search_kwargs={"k": 2})

In [43]:
rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | model
    | output_parser
)



In [44]:
print(rag_chain.invoke("what is rag system?"))

ValueError: Error during query: Query call with protocol GRPC search failed with message Deadline Exceeded.