# RAG Application using Type Sense

In [None]:
import typesense
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
client = typesense.Client({
    'nodes': [{
        'host': os.getenv("TYPESENSE_HOST"),
        'port':'443', # Typesense default value is 443
        'protocol': 'https' # Typesense default - https
    }],
    'api_key': os.getenv("TYPESENSE_API_KEY"),
    'connection_timeout_seconds': 2
})

In [None]:
client

In [None]:
# Defining Books schema (books.json)
books_schema = {
    'name': 'books',
    'fields': [
        {'name': 'title', 'type':'string'},
        {'name': 'authors', 'type':'string[]', 'facet':True},
        {'name': 'publication_year', 'type':'int32', 'facet': True},
        {'name': 'ratings_count', 'type':'int32'},
        {'name': 'average_rating', 'type':'float'},
    ],
    'default_sorting_field':'ratings_count'
}

# print(client.collections.create(books_schema))

In [None]:
with open('books.jsonl', 'r', encoding='utf-8') as jsonl_file:
    data = jsonl_file.read()
    client.collections['books'].documents.import_(data)

In [None]:
# creating search parameters
search_parameters = {
    'q' : 'harry potter',
    'query_by': 'title,authors',
    'sort_by':'ratings_count:desc'
}

client.collections['books'].documents.search(search_parameters)

In [None]:
# Adding filter_by
search_parameters = {
    'q' : 'harry potter',
    'query_by': 'title,authors',
    'filter_by':'publication_year:<1998',
    'sort_by':'ratings_count:desc'
}

client.collections['books'].documents.search(search_parameters)

In [None]:
search_parameters = {
    'q' : 'experinmet',
    'query_by': 'title',
    'facet_by':'authors',
    'sort_by':'average_rating:desc'
}

client.collections['books'].documents.search(search_parameters)

## Langchain + Typesense + OpenAI GPT LLM + RAG Application 

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Typesense
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import ChatOpenAI

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
import os
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
# Loading the text file
print("Loading the test.txt file")
loader = TextLoader("test.txt")
documents = loader.load()

print("Loaded the test.txt file")

print("Chunking the document")
# Splitting the doc - chunking
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
print("Done!")

print("Embedding the doc")
# Embedding ths Doc
embeddings = HuggingFaceEmbeddings()
print("Done with embedding")

Loading the test.txt file
Loaded the test.txt file
Chunking the document
Done!
Embedding the doc
Done with embedding


In [7]:
# Searching the doc with TypeSense
doc_search = Typesense.from_documents(
    docs,
    embeddings,
    typesense_client_params={
        'host': os.getenv("TYPESENSE_HOST"),
        "port":'443',
        "protocol":"https",
        "typesense_api_key": os.getenv("TYPESENSE_API_KEY"),
        "typesense_collection_name": "lang-chain"

    }
)