### Chroma multimodal: https://docs.trychroma.com/guides/multimodal 

In [1]:
# %pip install --upgrade chromadb
# %pip install pillow
# %pip install open-clip-torch
# %pip install tqdm
# %pip install matplotlib
# %pip install pandas
# %pip install langchain_openai


In [2]:
import os
import json
import chromadb 
import pandas as pd
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction 
from chromadb.utils.data_loaders import ImageLoader
from matplotlib import pyplot as plt 
from langchain_openai import ChatOpenAI

In [3]:
os.environ["OPENAI_API_KEY"] = 'sk-proj-YsE5dZkWvxqa3dJpGRhZT3BlbkFJ2zQSyvln9eq6cKEf2dau'  # getpass.getpass()
model = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [4]:
chroma_client = chromadb.PersistentClient(path='chroma_vectordb')

image_loader = ImageLoader()
multimodal_ef = OpenCLIPEmbeddingFunction()

kitchen_collection = chroma_client.get_or_create_collection(name="kitchen_view", embedding_function=multimodal_ef, data_loader=image_loader)
bathroom_collection = chroma_client.get_or_create_collection(name="bathroom_view", embedding_function=multimodal_ef, data_loader=image_loader)
bedroom_collection = chroma_client.get_or_create_collection(name="bedroom_view", embedding_function=multimodal_ef, data_loader=image_loader)
frontal_collection = chroma_client.get_or_create_collection(name="frontal_view", embedding_function=multimodal_ef, data_loader=image_loader)


  from .autonotebook import tqdm as notebook_tqdm


In [18]:

column_names = ['num_bedrooms', 'num_bathrooms', 'area', 'zipcode', 'price']
df = pd.read_csv('./houses_dataset/Houses Dataset/HousesInfo.txt', delimiter=' ', header=None, names=column_names)
print(len(df))

535


In [6]:
print(df.head(5))

   num_bedrooms  num_bathrooms  area  zipcode   price
0             4            4.0  4053    85255  869500
1             4            3.0  3343    36372  865200
2             3            4.0  3923    85266  889000
3             5            5.0  4022    85262  910000
4             3            4.0  4116    85266  971226


In [7]:
total_num_houses = len(df)
ids = []
kitchen_uris = []
bathroom_uris = []
bedroom_uris = []
frontal_uris = []
metadatas = []

for i in range(total_num_houses):
    ids.append(str(i))

    path = f'houses_dataset/Houses Dataset/{i+1}'
    kitchen_uris.append(f'{path}_kitchen.jpg')    
    bathroom_uris.append(f'{path}_bathroom.jpg')
    bedroom_uris.append(f'{path}_bedroom.jpg')
    frontal_uris.append(f'{path}_frontal.jpg')

    metadatas.append(
            {
                'number_of_bedrooms': int(df.iloc[i, 0]), 
                'number_of_bathrooms': int(df.iloc[i, 1]), 
                'area': int(df.iloc[i, 2]),
                'zipcode': int(df.iloc[i, 3]),
                'price': int(df.iloc[i, 4])
            }        
    )

In [8]:
print(ids[:10])
print(frontal_uris[:10])
print(metadatas[:10])

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
['houses_dataset/Houses Dataset/1_frontal.jpg', 'houses_dataset/Houses Dataset/2_frontal.jpg', 'houses_dataset/Houses Dataset/3_frontal.jpg', 'houses_dataset/Houses Dataset/4_frontal.jpg', 'houses_dataset/Houses Dataset/5_frontal.jpg', 'houses_dataset/Houses Dataset/6_frontal.jpg', 'houses_dataset/Houses Dataset/7_frontal.jpg', 'houses_dataset/Houses Dataset/8_frontal.jpg', 'houses_dataset/Houses Dataset/9_frontal.jpg', 'houses_dataset/Houses Dataset/10_frontal.jpg']
[{'number_of_bedrooms': 4, 'number_of_bathrooms': 4, 'area': 4053, 'zipcode': 85255, 'price': 869500}, {'number_of_bedrooms': 4, 'number_of_bathrooms': 3, 'area': 3343, 'zipcode': 36372, 'price': 865200}, {'number_of_bedrooms': 3, 'number_of_bathrooms': 4, 'area': 3923, 'zipcode': 85266, 'price': 889000}, {'number_of_bedrooms': 5, 'number_of_bathrooms': 5, 'area': 4022, 'zipcode': 85262, 'price': 910000}, {'number_of_bedrooms': 3, 'number_of_bathrooms': 4, 'area': 4116, 'z

In [9]:
def add_or_update_collection(collection, ids, uris, metadatas):      # add: to add first time, update: to update
    num_houses = 100
    # collection.add(                                         
    #     ids=ids[:num_houses],
    #     uris=uris[:num_houses],
    #     metadatas=metadatas[:num_houses]
    # )

    collection.update(
        ids=ids[:num_houses],
        uris=uris[:num_houses],
        metadatas=metadatas[:num_houses]
    )


In [None]:
add_or_update_collection(frontal_collection, ids, frontal_uris, metadatas)
add_or_update_collection(kitchen_collection, ids, kitchen_uris, metadatas)
add_or_update_collection(bathroom_collection, ids, bathroom_uris, metadatas)
add_or_update_collection(bedroom_collection, ids, bedroom_uris, metadatas)

In [11]:
frontal_collection.count()

100

In [12]:
def print_query_results(query_list: list, query_results: dict)->None:
    result_count = len(query_results['ids'][0])

    for i in range(len(query_list)):
        print(f'Results for query: {query_list[i]}')

        for j in range(result_count):
            id       = query_results["ids"][i][j]
            distance = query_results['distances'][i][j]
            data     = query_results['data'][i][j]
            document = query_results['documents'][i][j]
            metadata = query_results['metadatas'][i][j]
            uri      = query_results['uris'][i][j]

            print(f'id: {id}, distance: {distance}, metadata: {metadata}, document: {document}') 
            print(f'data: {uri}')
            plt.imshow(data)
            plt.axis("off")
            plt.show()

In [13]:
def multimodal_query(multimodal_db, query_texts):
    query_results = multimodal_db.query(
        query_texts=query_texts,
        n_results=2, 
        include=['documents', 'distances', 'metadatas', 'data', 'uris'],
        # where={'$and': [
        #     {'price': {'$gt': price - bound_range}},
        #     {'price': {'$lt': price + bound_range}},
        #     {'number_of_bedrooms': {'$eq': num_bedroom}}
        # ]
    # }                                                                  # Options: $eq, $ne, $gt, $gte, $lt, $lte
    )
    return query_results



In [14]:
def get_weights(embedder, query):
    template = (
            "I have four different pictures of a house: the internal picture of the kitchen, internal picture of a bathroom, internal picture of a bedroom, and frontal view of house. "
            "Which pictures are more relevant for this query: " + query + 
            " Please provide the result only in json format with the weight of each picture type. The summation of the weights should be 1. Use keywords only: kitchen, bathroom, bedroom, frontal"
        )

    res = embedder.invoke(template)
    res_str = res.content if hasattr(res, 'content') else str(res)
    weights = json.loads(res_str)
    return weights

In [15]:
all_queries = [
    'Find houses in the hilly area',
    'Find houses with wooden floor bedroom',
    'Find houses with tiles floor kitchen',
    'Find houses with flowers in the bathroom'
]

In [None]:
for query in all_queries:
    weights = get_weights(model, query)
    print("query:", query)
    print("res:", weights)
    queries = [query]

    max_key = max(weights, key=weights.get)
    print("using:", max_key)

    if max_key == 'kitchen':
        result = multimodal_query(kitchen_collection, queries)
    elif max_key == 'bathroom':
        result = multimodal_query(bathroom_collection, queries)
    elif max_key == 'bedroom':
        result = multimodal_query(bedroom_collection, queries)
    else:
        result = multimodal_query(frontal_collection, queries)

    print(result['ids'])
    print_query_results(queries, result)