# Example of basic querying based on filters 

## Import

In [10]:
import shelve
import os
import pandas as pd
from qdrant_client import QdrantClient

In [11]:
# Initialize Qdrant client
client = QdrantClient("localhost", port=6333)

# Define the query filter
query_filter = {
    "must": [
        {
            "key": "TissueName",
            "match": {
                "text": "blood"
            }
        }
    ],
    "must_not": [
        {
            "key": "Age",
            "match": {
                "text": "None"  # Assuming NaN values for 'Age' are represented by an empty string
            }
        },
        {
            "key": "Sex",
            "match": {
                "text": "None"  # Assuming NaN values for 'Sex' are represented by an empty string
            }
        }
    ]
}

# Retrieve data from Qdrant
result = client.scroll(
    collection_name="GPL570",
    scroll_filter=query_filter,
    with_payload=True,
    with_vectors=True,
    limit=100  # Adjust as needed
)[0]

# Function to create a DataFrame for metadata
def create_metadata_dataframe(hits):
    return pd.DataFrame([hit.payload for hit in hits])

# Function to create a DataFrame for experimental data
def create_data_dataframe(hits):
    cache_path = "~/.local/share/biovdb"
    cnames_path = os.path.join(os.path.expanduser(cache_path), "cnames")
    with shelve.open(cnames_path) as db:
        gene_names = db.get("GPL570", [])
    
    data_rows = []
    for hit in hits:
        data_row = {'vector_' + str(i): v for i, v in enumerate(hit.vector)}
        data_row['GSM'] = hit.payload['GSM']
        data_rows.append(data_row)
    
    data_df = pd.DataFrame(data_rows)
    data_df.set_index('GSM', inplace=True)

    vector_columns = ['vector_' + str(i) for i in range(len(gene_names)) if 'vector_' + str(i) in data_df.columns]
    rename_dict = dict(zip(vector_columns, gene_names))
    data_df.rename(columns=rename_dict, inplace=True)

    return data_df

# Create separate DataFrames for metadata and experimental data
blood_metadata_df = create_metadata_dataframe(result)
blood_data_df = create_data_dataframe(result)


In [12]:
blood_data_df

Unnamed: 0_level_0,A1BG,A1BG-AS1,A1CF,A2M,A2M-AS1,A2ML1,A2MP1,A4GALT,A4GNT,AA06,...,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3,abParts,av27s1,hsa-let-7a-3,mir-223
GSM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM50698,7.8,4.3,14.00,8.30,83.0,21.20,27.3,5.6,7.7,16.3,...,26.183332,1.8,37.933334,53.20,21.866667,20.75,4.9,4.1,15.50,209.7
GSM50699,17.0,1.8,8.45,14.30,5.9,5.65,21.5,14.9,2.3,25.2,...,10.500000,7.5,18.033333,31.65,12.400000,14.20,2.1,23.0,2.70,60.3
GSM50700,134.5,24.6,249.75,124.70,372.3,96.05,38.5,224.6,12.4,161.3,...,113.916664,20.5,274.166660,482.85,84.866670,89.50,14.9,24.9,116.20,288.2
GSM50701,9.1,1.5,6.95,4.75,0.5,4.15,2.5,2.7,6.2,12.0,...,1.750000,1.5,11.066667,31.20,8.500000,9.35,4.6,2.7,3.40,17.8
GSM50702,9.1,4.2,5.25,9.80,1.4,20.45,7.9,6.0,1.2,15.8,...,10.033334,14.0,20.666666,33.15,5.266667,14.05,3.2,5.4,5.80,10.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GSM100892,51.0,23.1,33.80,18.20,215.3,10.45,19.1,43.4,26.3,61.5,...,87.433334,4.0,237.466660,831.55,109.700000,86.35,215.7,25.7,33.70,427.8
GSM100893,25.4,9.7,42.80,41.55,149.5,8.80,16.8,20.5,28.2,18.5,...,85.733330,3.0,215.733340,1122.85,89.966670,58.10,50.2,7.0,50.85,775.9
GSM100894,6.6,24.3,25.85,30.40,153.3,32.75,70.3,28.4,3.9,66.5,...,94.450000,11.0,207.166670,922.80,145.433330,41.80,255.4,46.7,65.10,498.2
GSM100895,34.0,5.5,39.55,17.85,391.0,12.20,16.1,18.5,7.4,33.7,...,69.600000,1.9,326.900000,1335.75,114.766670,41.75,244.2,47.0,34.10,682.3


In [13]:
blood_metadata_df

Unnamed: 0,Age,ExperimentID,GSM,PlatformID,Sex,Species,TissueID,TissueName
0,,2634.0,GSM50698,570.0,,Homo sapiens,89.0,blood
1,,2634.0,GSM50699,570.0,,Homo sapiens,89.0,blood
2,,2634.0,GSM50700,570.0,,Homo sapiens,89.0,blood
3,,2634.0,GSM50701,570.0,,Homo sapiens,89.0,blood
4,,2634.0,GSM50702,570.0,,Homo sapiens,89.0,blood
...,...,...,...,...,...,...,...,...
95,,4488.0,GSM100892,570.0,,Homo sapiens,89.0,blood
96,,4488.0,GSM100893,570.0,,Homo sapiens,89.0,blood
97,,4488.0,GSM100894,570.0,,Homo sapiens,89.0,blood
98,,4488.0,GSM100895,570.0,,Homo sapiens,89.0,blood
