In [None]:
from IPython.display import display, clear_output, Javascript, Markdown
import ipywidgets as ipw
import utils
import json
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import google.generativeai as genai
import numpy as np

In [None]:
CONFIG = utils.read_json("config.json")
CONFIG_ELN = utils.get_aiidalab_eln_config()
# CONFIG_ELN = utils.read_json("eln_config.json")
OPENBIS_SESSION, SESSION_DATA = utils.connect_openbis(CONFIG_ELN["url"], CONFIG_ELN["token"])

OPENBIS_OBJECTS_DETAILS = utils.read_json("/home/jovyan/openBIS_data.json")
OPENBIS_OBJECTS_EMBEDDINGS = utils.read_json("/home/jovyan/openBIS_embeddings.json")

prompt_textarea = utils.Textarea(
    description = "Prompt", 
    layout = ipw.Layout(width = '980px', height = '300px'),
    style = {"description_width": "110px"}
)

model_answer_textarea = utils.Textarea(
    description = "Answer", 
    layout = ipw.Layout(width = '980px', height = '600px'),
    style = {"description_width": "110px"}
)

enter_button = utils.Button(
    description = '', disabled = False, button_style = '', 
    tooltip = 'Enter', icon = 'arrow-right', layout = ipw.Layout(width = '100px', height = '50px')
)

quit_button = utils.Button(
    description = '', disabled = False, button_style = '', 
    tooltip = 'Main menu', icon = 'home', layout = ipw.Layout(width = '100px', height = '50px')
)

increase_buttons_size = utils.HTML(data = ''.join(CONFIG["save_home_buttons_settings"]))

# Google Gemini 2.0 Flash
google_api_key = utils.read_json("/home/jovyan/gemini_api.json")
genai.configure(api_key=google_api_key["api_key"])
model_name = "gemini-2.0-flash"
system_instruction = "You are a materials science expert working with nanotech materials."
model = genai.GenerativeModel(
    model_name = model_name, 
    system_instruction = system_instruction
)

MESSAGES = [
    {"role": "user", "parts": [{"text": "Hi"}]},
    {"role": "model", "parts": [{"text": "Hi, my name is Bot."}]}
]

In [None]:
def close_notebook(b):
    display(utils.Javascript(data = 'window.location.replace("home.ipynb")'))

def ask_chatbot(change):
    model_answer_textarea.value = ""
    prompt = prompt_textarea.value
    
    load_chatbot(prompt)
    
    MESSAGES.append({"role": "user", "parts": [{"text": prompt}]})
    response = model.generate_content(MESSAGES)
    response_text = response.text
    model_answer_textarea.value = model_answer_textarea.value + response_text
    MESSAGES.append({"role": "model", "parts": [{"text": response_text}]})

def get_embeddings(openbis_objects, embeddings_filepath):
    try:
        openbis_objects_embeddings = utils.read_json(embeddings_filepath)
    except FileNotFoundError:
        openbis_objects_embeddings = {}
        
    for obj_permid, obj in tqdm(openbis_objects.items()):
        if obj_permid not in openbis_objects_embeddings:
            embedding = genai.embed_content(model="models/text-embedding-004", content = obj["details"])
            embedding = embedding["embedding"]
            openbis_objects_embeddings[obj_permid] = embedding

    utils.create_json(openbis_objects_embeddings, embeddings_filepath)

def retrieve_openbis_objects(details_filepath):
    objects = session.get_objects(attrs = ["parents"])
    dict_objects = {}
    k = 0
    for obj in tqdm(objects):
        obj_props = obj.props.all()
        obj_permid = obj.permId
        obj_type = obj.attrs.type
        if obj_permid not in dict_objects or obj_type == "GENERAL_ELN_SETTINGS":
            obj_name = obj_props.get("$name", "")
            obj_regist_date = obj.registrationDate
            obj_string = f"- Object of type {obj_type} is named {obj_name}, is identified by {obj_permid} and it was registered in {obj_regist_date}."
            
            props_string = ""
            for key, value in obj_props.items():
                if key != "$name":
                    if value:
                        props_string += f"\n\t{key}: {value}"

            if props_string:
                obj_string += f" It contains the following properties:{props_string}"

            obj_parents_permids = []
            if obj.parents:
                obj_string += " It is connected to the objects with the following identifiers:"
                for parent_obj in obj.parents:
                    parent_obj = OPENBIS_SESSION.get_objects(parent_obj)[0]
                    parent_permid = parent_obj.permId
                    obj_string += f" {parent_permid}"
                    obj_parents_permids.append(parent_permid)
                
                obj_string += "."
            
            dict_objects[obj_permid] = {"details": obj_string, "parents": obj_parents_permids}
    
    utils.create_json(dict_objects, details_filepath)

def get_parent_objects(obj, list_of_objects):
    obj_props = obj.props.all()
    obj_permid = obj.permId
    
    if obj_permid not in list_of_objects:
        obj_name = obj_props["$name"]
        obj_type = obj.attrs.type
        obj_regist_date = obj.registrationDate
        obj_string = f"- Object of type {obj_type} is named {obj_name}, is identified by {obj_permid} and it was registered in {obj_regist_date}."
        
        props_string = ""
        for key, value in obj_props.items():
            if key != "$name":
                if value:
                    props_string += f"\n\t{key}: {value}"

        if props_string:
            obj_string += f" It contains the following properties:{props_string}"
            
        if obj.parents:
            obj_string += " It is connected to the objects with the following identifiers:"
            for parent_obj in obj.parents:
                parent_obj = OPENBIS_SESSION.get_object(parent_obj, attrs = ['parents'])
                obj_string += f" {parent_obj.permId}"
                list_of_objects = get_parent_objects(parent_obj, list_of_objects)
            
            obj_string += "."
        
        if obj_string not in list_of_objects:
            list_of_objects[obj_permid] = obj_string
    
    return list_of_objects

def load_chatbot(prompt):
    prompt_embedding = genai.embed_content(model="models/text-embedding-004", content=prompt)
    prompt_embedding = np.array(prompt_embedding["embedding"]).reshape(1, -1)
    
    embeddings = list(OPENBIS_OBJECTS_EMBEDDINGS.values())
    embeddings = np.array(embeddings)
    embeddings = np.squeeze(embeddings)

    documents = [obj["details"] for obj_permid, obj in OPENBIS_OBJECTS_DETAILS.items()]
    
    similarity_scores = cosine_similarity(prompt_embedding.reshape(1, -1), embeddings)

    # Define a threshold
    threshold = 0.5

    # Filter documents based on the threshold
    relevant_documents = [doc for i, doc in enumerate(documents) if similarity_scores[0][i] > threshold]
    
    # Load chatbot
    prompt_data = "\n".join(relevant_documents)
    MESSAGES.append({"role": "user", "parts": [{"text": f"This is the relevant data: {prompt_data}"}]})
    MESSAGES.append({"role": "model", "parts": [{"text": "Ok feel free to ask questions."}]})

In [None]:
# get_embeddings(OPENBIS_OBJECTS_DETAILS, "/home/jovyan/openBIS_embeddings.json")
# retrieve_openbis_objects("/home/jovyan/openBIS_data.json")

# openBIS chatbot

In [None]:
display(increase_buttons_size)
display(prompt_textarea)
display(enter_button)
display(model_answer_textarea)
display(quit_button)
enter_button.on_click(ask_chatbot)
quit_button.on_click(close_notebook)