# Tutorial on creating a vector database with openml objects
- How would you use the API to create a vector database with openml objects (datasets, flows etc)

In [None]:
from __future__ import annotations
from langchain.globals import set_llm_cache
from langchain_community.cache import SQLiteCache
import os
import sys
import chromadb

# change the path to the backend directory
sys.path.append(os.path.join(os.path.dirname("."), "../../backend/"))

In [None]:
from modules.utils import (
    get_all_metadata_from_openml,
    create_metadata_dataframe,
    load_config_and_device,
)
from modules.llm import load_document_and_create_vector_store, setup_vector_db_and_qa

In [None]:
config = load_config_and_device("../../backend/config.json")
config["persist_dir"] = "../../data/doc_examples/chroma_db/"
config["data_dir"] = "../../data/doc_examples/"
config["type_of_data"] = "dataset"
config["training"] = True

# load the persistent database using ChromaDB
client = chromadb.PersistentClient(path=config["persist_dir"])
print(config)


## Manually

In [None]:
# Download the data if it does not exist
openml_data_object, data_id, all_metadata = get_all_metadata_from_openml(config=config)
# Create the combined metadata dataframe
metadata_df, all_metadata = create_metadata_dataframe(
    openml_data_object, data_id, all_metadata, config=config
)
# Create the vector store
vectordb = load_document_and_create_vector_store(
    metadata_df, config=config, chroma_client=client
)

## API

In [None]:
qa = setup_vector_db_and_qa(
    config=config, data_type=config["type_of_data"], client=client
)