# Load the Chroma Db and get retrieval results for a given query
- How would you load the Chroma Db and get retrieval results for a given query?

In [6]:
from __future__ import annotations
from langchain.globals import set_llm_cache
from langchain_community.cache import SQLiteCache
import os
import sys
import chromadb

# change the path to the backend directory
sys.path.append(os.path.join(os.path.dirname("."), "../../backend/"))

In [7]:
from modules.utils import load_config_and_device
from modules.llm import setup_vector_db_and_qa
from modules.results_gen import get_result_from_query

In [3]:
# Config and DB

# load the configuration and device
config = load_config_and_device("../../backend/config.json")
config["persist_dir"] = "../../backend/data/chroma_db/"
config["data_dir"] = "../../backend/data/"

[INFO] Finding device.
[INFO] Device found: cpu


In [8]:
# load the persistent database using ChromaDB
client = chromadb.PersistentClient(path=config["persist_dir"])

In [9]:
# Setup llm chain, initialize the retriever and llm, and setup Retrieval QA
qa_dataset = setup_vector_db_and_qa(config=config, data_type="dataset", client=client)

[INFO] Loading metadata from file.
[INFO] Loading model...




[INFO] Model loaded.


## Just get documents

In [10]:
query = "give me datasets about mushrooms"

In [13]:
res = qa_dataset.invoke(input=query, top_k=5)[:10]
res

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[Document(page_content='### **Dataset Details**\n![](https://meta-album.github.io/assets/img/samples/FNG.png)\n\n**Meta Album ID**: PLT.FNG  \n**Meta Album URL**: [https://meta-album.github.io/datasets/FNG.html](https://meta-album.github.io/datasets/FNG.html)  \n**Domain ID**: PLT  \n**Domain Name**: Plants  \n**Dataset ID**: FNG  \n**Dataset Name**: Fungi  \n**Short Description**: Fungi dataset from Denmark  \n**\\# Classes**: 25  \n**\\# Images**: 15122  \n**Keywords**: fungi, ecology, plants  \n**Data Format**: images  \n**Image size**: 128x128  \n\n**License (original data release)**: BSD-3-Clause License  \n**License URL(original data release)**: https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE\n \n**License (Meta-Album data release)**: BSD-3-Clause License  \n**License URL (Meta-Album data release)**: [https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE](https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE)', metadata={'did': 44335, 'name': 'Met

In [None]:
res[0].metadata

{'did': 44335, 'name': 'Meta_Album_FNG_Extended'}

In [16]:
print(res[0].page_content)

### **Dataset Details**
![](https://meta-album.github.io/assets/img/samples/FNG.png)

**Meta Album ID**: PLT.FNG  
**Meta Album URL**: [https://meta-album.github.io/datasets/FNG.html](https://meta-album.github.io/datasets/FNG.html)  
**Domain ID**: PLT  
**Domain Name**: Plants  
**Dataset ID**: FNG  
**Dataset Name**: Fungi  
**Short Description**: Fungi dataset from Denmark  
**\# Classes**: 25  
**\# Images**: 15122  
**Keywords**: fungi, ecology, plants  
**Data Format**: images  
**Image size**: 128x128  

**License (original data release)**: BSD-3-Clause License  
**License URL(original data release)**: https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE
 
**License (Meta-Album data release)**: BSD-3-Clause License  
**License URL (Meta-Album data release)**: [https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE](https://github.com/picekl/DanishFungiDataset/blob/main/LICENSE)


## Process the results and return a dataframe instead

In [21]:
# Fetch the result data frame based on the query
result_data_frame, result_documents = get_result_from_query(
    query=query, qa=qa_dataset, type_of_query="dataset", config=config
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [22]:
result_data_frame.head()

Unnamed: 0,id,name,command,OpenML URL,Description
0,44335,Meta_Album_FNG_Extended,dataset = openml.datasets.get_dataset(44335),"<a href=""https://www.openml.org/search?type=da...","did - 44335, name - Meta_Album_FNG_Extended, v..."
1,44302,Meta_Album_FNG_Mini,dataset = openml.datasets.get_dataset(44302),"<a href=""https://www.openml.org/search?type=da...",### **Dataset Details**\n![](https://meta-albu...
2,24,mushroom,dataset = openml.datasets.get_dataset(24),"<a href=""https://www.openml.org/search?type=da...","did - 24, name - mushroom, version - 1, upload..."
3,44272,Meta_Album_FNG_Micro,dataset = openml.datasets.get_dataset(44272),"<a href=""https://www.openml.org/search?type=da...","did - 44272, name - Meta_Album_FNG_Micro, vers..."
10,1558,bank-marketing,dataset = openml.datasets.get_dataset(1558),"<a href=""https://www.openml.org/search?type=da...",* Dataset:
