In [0]:
import dataiku
from langchain.chains.question_answering import load_qa_chain
from dataiku.langchain.dku_llm import DKUChatLLM
import json
KB_IDs = {
    "tech_docs": "zQ92IhQ9",
    "non_conformities": "WnKb6p17"
}

client = dataiku.api_client()
project = client.get_default_project()

In [0]:
# Listing available LLMs
llm_list = project.list_llms()

for llm in llm_list:
    print(f"- {llm.description} (id: {llm.id})")

In [0]:
# Fill with your LLM id
LLM_ID = "openai:OpenAI-FA:gpt-4o-mini"

In [0]:
# Preparing the Knowledge Bank, Vector store and LLM
KBs = {
    key: dataiku.KnowledgeBank(id=value, project_key=project.project_key)
    for key, value in KB_IDs.items()
}
vector_stores = {
    key: value.as_langchain_vectorstore()
    for key, value in KBs.items()
}

langchain_llm = DKUChatLLM(llm_id=LLM_ID, temperature=0)

In [0]:
# Create the question answering chain
chain = load_qa_chain(langchain_llm, chain_type="stuff")
user_message = "Fuel Voltage Levels Quality"


In [0]:
prompt = (
    f"You're supporting Quality Controller for A220 and rely on the knowledge from the A220 technical "
    f"doc and non conformity knowledge base (vector databases). You must provide an optimized expanded prompt towards "
    f"those vector databases to enable the best retrieval given the user input. "
    f"The expansion should only concern specificity around the user query and avoid retrieval of non specific vocabulary, "
    f"as knowledge databses will contain any past non conformity. Avoid generic vocabulary like 'non-conformity', 'issue', "
    f"'specification', 'standard', 'operations', 'maintainance'. But expand domain vocabulary.\n "
    f"Format of the output: Please just provide the query without any comment to be reused as is. "
    f"Optimal request should be between 20 and 50 words \n\n"
    f"The user is the following:\n {user_message}\n\n\n"
    f"Remember to only provide the requested query for the knowledge database without any comment."
)
llm = project.get_llm(LLM_ID)
completion = llm.new_completion()
completion.with_message(prompt)
resp = completion.execute()

print(resp.text)
query = resp.text

In [0]:
search_results = [result for key, value in vector_stores.items() for result in value.similarity_search(query)]
search_results = [ {
        "doc": s.metadata['doc'],
        "chunk_id": s.metadata['chunk_id'],
        #"chunk": s.page_content
    }
    for s in search_results
]
print(search_results)

search_results = {
    key: value.similarity_search(query)
    for key, value in vector_stores.items()
}

for key in KB_IDs:
    for search_result in search_results[key]:
        print(f"# {search_result.doc} \n{search_result.page_content}\n")

In [0]:
        # 3rd step : give the best advice given the documents
        
prompt = """
    #Processus
    Une non conformité de l'A220 doit être traitée selon le processus suivant :

    000 - rapport de non-conformité par le Quality Controler
    100 - analyse et recommandation / plan d'action par le Design Office
    200 - validation de l'analyse / plan d'action par le Design Manager
    300 - calcul de structure lié au plan d'action et recommandation / selon le Stress Office
    400 - du calcul / plan d'action amendé par le Stress Manager
    500 - plan d'action final validé par le Quality Manager

    Vous supportez le role de l'étape {000} et devez rédiger de la facon la plus explicite en prenant
    les exemples fournis et la documentation technique.

    #Exemples et documentation technique:
    {json.dumps(search_results)}

    #La requête utilisateur est la suivante:
    {user_message}

    #Réponse
    ## Instructions de réponse    
    Veuillez répondre pour l'étape {role}, en fournissant le meilleur 'label' et la meilleure 'description' possible selon les exemples, n'hésitant pas à illustrer selon les
    documentation technique le cas échéant. La description fournie doit être complètement rédigée.
    
    ##Format de réponse
    Répondez en anglais sauf si l'utilisateur utilise une autre langue ou précise des instructions de langue.
    Format de réponse attendu en json sans autre mise en forme : '##(label)\ndescription'
"""

completion = llm.new_completion()
completion.with_message(prompt)
resp = completion.execute()

In [0]:
deep_chat_response = {
    "text": resp.text,
    "sources": search_results,
    "role": "ai"
}
print(deep_chat_response)