In [37]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import TiDBVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_text_splitters import MarkdownHeaderTextSplitter

import getpass
import os

tidb_connection_string = getpass.getpass("TiDB Connection String:")
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

# Creates the vectors stored in TidB from the text file
# docs = None

# with open ("country_data_markdown.txt", "r", encoding="UTF-8") as file:
#     document = file.read()

#     headers_to_split_on = [
#         ('#', "Country"),
#     ]

#     markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
#     docs = markdown_splitter.split_text(document)

# with open ("country_metadata.txt", 'r', encoding="UTF-8") as file:
#     docsIndex = 0
#     for line in file:
#         line = line.strip()
#         if not line:
#             docsIndex += 1
#             continue
#         if ":" in line:
#             key, value = line.split(":", 1)
#             key = key.strip()
#             value = value.strip()

#             if key == "Crime_Index" or key == "Download_Speed" or key == "Tap_Water_Index":
#                 docs[docsIndex].metadata[key] = float(value)
#             else:
#                 docs[docsIndex].metadata[key] = value

embeddings = OpenAIEmbeddings()

# Creates the Vector Store and saves it to TiDB
# vector_store = TiDBVectorStore.from_documents(
#     documents=docs,
#     embedding=embeddings,
#     table_name="vectors",
#     connection_string=tidb_connection_string,
#     distance_strategy="cosine", 
# )

# Query the Vector Store
vector_store = TiDBVectorStore.from_existing_vector_table(
    embedding=embeddings,
    connection_string=tidb_connection_string,
    table_name="vectors",
    distance_strategy="cosine",
)

# # Finds the most similar document to the query
query = "Find a country with people that speak English and Spanish, with warm weather, with extra hot spicy food, with people that follow Christianity, with a crime index of under 6, with landmarks, with many places for hiking, with broadband download speed of over 50 Mbps, with a tap water index of over 60, with no ongoing conflicts or regional tensions, with political stability and no political tensions, with a government that has a voting system, not in the continents of North America, and specifically not Bangladesh, Libya, Lebanon, Afghanistan, Somalia, Iran, Yemen, Syria, Russia, Myanmar, Venezuela, Iraq, South Sudan, Mali, Central African Republic, Burkina Faso, Haiti, Belarus, North Korea, Ukraine, Sudan, Mexico, Israel, or Palestine State."
filters = {
    "Crime_Index":{"$lt": 4.5},
    "Download_Speed":{"$gt": 100},
    "Tap_Water_Index":{"$gt": 80},
}

docs_with_score = vector_store.similarity_search_with_relevance_scores(query, filter=filters, k=20)
# docs_with_score = vector_store.similarity_search_with_relevance_scores(query, k=20)
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    # print(doc.metadata)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.8236742124751811
Canada is located in North America. Speaks English, French officially. Canada, the second-largest country in the world by land area, is known for its vast natural beauty and multicultural cities. Stretching from the Atlantic to the Pacific and into the Arctic Ocean, it offers an incredible diversity of landscapes, including rugged mountains, serene lakes, and expansive forests. Canada's cities, such as Toronto, Vancouver, and Montreal, are celebrated for their cultural vibrancy and diversity. Canada's climate varies significantly across its vast territory. The country experiences a range of climatic regions, from Arctic weather in the north to temperate conditions in the south. Winters can be harsh in many parts, particularly in the interior and Prairie provinces, where daily average temperatures are near −15°C (5°F) but can drop below −40°C (−40°F) with severe wind chills. In co