# SurrealDB Vector Store for LangChain

## Install python dependencies

In [1]:
#!pip install -r requirements.txt

## Start SurrealDB in a container

In [2]:
#!docker compose up -d

## Import packages

In [1]:
from surrealdb import Surreal
from surreal_langchain import SurrealDBStore
from langchain.embeddings import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


## Some sample sentences to test with

In [30]:
sentences = [
    "LangChain is a framework designed to simplify the creation of applications using large language models (LLMs).", 
    "As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.",
    "LangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence.", 
    "The project quickly garnered popularity, with improvements from hundreds of contributors on GitHub, trending discussions on Twitter, lively activity on the project's Discord server, many YouTube tutorials, and meetups in San Francisco and London.", 
    "In April 2023, LangChain had incorporated and the new startup raised over $20 million in funding at a valuation of at least $200 million from venture firm Sequoia Capital, a week after announcing a $10 million seed investment from Benchmark.",
    "In October 2023 LangChain introduced LangServe, a deployment tool designed to facilitate the transition from LCEL (LangChain Expression Language) prototypes to production-ready applications.",
]

## Helper method to delete existing documents to start fresh

In [21]:
async def delete_docs():
    async with Surreal("ws://localhost:8000/rpc") as db:
        await db.signin({"user": "root", "pass": "root"})
        await db.use("langchain", "database")
        await db.delete("documents")

In [27]:
# await delete_docs()

## Initialize SurrealDBStore

There are 2 ways to initialize the SurrealDBStore:
1. Create the SurrealDBStore object and call the initialize method

In [26]:
sdb = SurrealDBStore(dburl="http://localhost:8000/rpc",k=10,db_user="root",db_pass="root")
await sdb.initialize()

2. Load documents and return the pre-initialized SurrealDBStore object

In [31]:
sdb = await SurrealDBStore.afrom_texts(dburl="http://localhost:8000/rpc",texts=sentences,db_user="root",db_pass="root")

## Similarity Search with query

In [32]:
await sdb.asimilarity_search("What is Langchain?")

[Document(page_content='LangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence.', metadata={'id': 'documents:7on84nix6uv2n2nfbkne'}),
 Document(page_content='In October 2023 LangChain introduced LangServe, a deployment tool designed to facilitate the transition from LCEL (LangChain Expression Language) prototypes to production-ready applications.', metadata={'id': 'documents:2pfz07l2awlutdvpa7bp'}),
 Document(page_content='LangChain is a framework designed to simplify the creation of applications using large language models (LLMs).', metadata={'id': 'documents:p7u7s5305g3jli009g73'}),
 Document(page_content="As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.", metadata={'id': 'documents:riv3aeiemtthl3cohgk3'})]

## Similarity search with embeddings of the query 

You can limit the results by providing a value for `k`(default: `4`).

In [5]:
embeddings = HuggingFaceEmbeddings().embed_query("What is Langchain?")
await sdb.asimilarity_search_by_vector(embeddings,k=4)

[Document(page_content='LangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence.', metadata={'id': 'documents:l4oza7p9rvv9v68o0ap4'}),
 Document(page_content='In October 2023 LangChain introduced LangServe, a deployment tool designed to facilitate the transition from LCEL (LangChain Expression Language) prototypes to production-ready applications.', metadata={'id': 'documents:6bnpp6db3hlq98j21sbi'}),
 Document(page_content='LangChain is a framework designed to simplify the creation of applications using large language models (LLMs).', metadata={'id': 'documents:6yeintfk2zn7rk7dnk3w'}),
 Document(page_content="As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.", metadata={'id': 'documents:70kd6k5pemouv9w6uj5s'})]

## Similarity search that returns distances along with the documents

You can specify a `score_threshold` to only return the documents with scores equal to or higher than the threshold

In [8]:
await sdb.asimilarity_search_with_score("What is Langchain?",k=10,score_threshold=0.6)

[(Document(page_content='LangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence.', metadata={'id': 'documents:l4oza7p9rvv9v68o0ap4'}),
  0.707384991228603),
 (Document(page_content='In October 2023 LangChain introduced LangServe, a deployment tool designed to facilitate the transition from LCEL (LangChain Expression Language) prototypes to production-ready applications.', metadata={'id': 'documents:6bnpp6db3hlq98j21sbi'}),
  0.680691705615241),
 (Document(page_content='LangChain is a framework designed to simplify the creation of applications using large language models (LLMs).', metadata={'id': 'documents:6yeintfk2zn7rk7dnk3w'}),
  0.6542964797390185),
 (Document(page_content="As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.", metadata={'id': 'docu

## Similarity search that returns relevance scores with the documents (currently same as distance)

In [9]:
await sdb.asimilarity_search_with_relevance_scores("What is Langchain?",score_threshold=0.5)

[(Document(page_content='LangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence.', metadata={'id': 'documents:l4oza7p9rvv9v68o0ap4'}),
  0.707384991228603),
 (Document(page_content='In October 2023 LangChain introduced LangServe, a deployment tool designed to facilitate the transition from LCEL (LangChain Expression Language) prototypes to production-ready applications.', metadata={'id': 'documents:6bnpp6db3hlq98j21sbi'}),
  0.680691705615241),
 (Document(page_content='LangChain is a framework designed to simplify the creation of applications using large language models (LLMs).', metadata={'id': 'documents:6yeintfk2zn7rk7dnk3w'}),
  0.6542964797390185),
 (Document(page_content="As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.", metadata={'id': 'docu