In [5]:
from langchain.chat_models import ChatOpenAI

from langchain.document_loaders import UnstructuredFileLoader, CSVLoader
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
)
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache/")

recursiveSplitter = RecursiveCharacterTextSplitter()

loader = CSVLoader("./files/unique_spaces.csv")

docs = loader.load_and_split(text_splitter=recursiveSplitter)

len(docs)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

In [7]:
results = vectorstore.similarity_search("서초구")

results

[Document(page_content='ID: 660\nTitle: 치유요가\nCategory: 요가,명상\nLocation: 서울 서초구 서초동\nDistance from center: 0.73\nCenter Longitude: 127.00955700000002\nCenter Latitude: 37.481628\nTile ID: 151', metadata={'row': 419, 'source': './files/unique_spaces.csv'}),
 Document(page_content='ID: 660\nTitle: 치유요가\nCategory: 요가,명상\nLocation: 서울 서초구 서초동\nDistance from center: 0.73\nCenter Longitude: 127.00955700000002\nCenter Latitude: 37.481628\nTile ID: 151', metadata={'row': 419, 'source': './files/unique_spaces.csv'}),
 Document(page_content='ID: 2698\nTitle: 캄요가\nCategory: 요가,명상\nLocation: 남양주 다산동\nDistance from center: 0.27\nCenter Longitude: 127.15355700000002\nCenter Latitude: 37.625628000000006\nTile ID: 373', metadata={'row': 1521, 'source': './files/unique_spaces.csv'}),
 Document(page_content='ID: 2698\nTitle: 캄요가\nCategory: 요가,명상\nLocation: 남양주 다산동\nDistance from center: 0.27\nCenter Longitude: 127.15355700000002\nCenter Latitude: 37.625628000000006\nTile ID: 373', metadata={'row': 1521,