In [1]:
from langchain_ollama import ChatOllama
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

In [2]:
local_llm = "llama3.1"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

In [3]:
# name
full_name_field = ResponseSchema(name="name", description=f"Based on the latest user message, extract the names presented in the message. Here are some possible labels: 'full_name', 'short_name'. All the names are in Vietnamese and they can be in capital form or not. If the name is missing first name or last name, it is a short name. For example, a person named 'Đinh Viết Sang', in the message it can be 'đinh viết sang' (not capital form) or 'Sang' (short name).")
# subject/courses
courses_name_field = ResponseSchema(name="courses", description=f"Based on the user message about lecturer information, extract the subjects/courses that a teacher can teach in a university. Here are some possible labels: 'course_name', 'course_code'. The course name is the name of the course, for example 'Ứng dụng học sâu', while the code is in the format 'IT1234E'. Everything will be in Vietnamese. ")
# faculty

conversation_metadata_output_schema_parser = StructuredOutputParser.from_response_schemas(
    [
        # user intent
        full_name_field,
        # user need
        courses_name_field
    ]
)
conversation_metadata_output_schema = conversation_metadata_output_schema_parser.get_format_instructions()

In [11]:
from langchain.prompts import PromptTemplate

conversation_metadata_prompt_template_str = """
Given in input a question about lecturer, courses information between a user and a Hanoi University of Science and Technology assistant, \
extract the following metadata according to the format instructions below. If there are no answer, extract the dictionary with no values.
 
<< FORMATTING >>
{format_instructions}
 
<< INPUT >>
{chat_history}
"""
 
conversation_metadata_prompt_template = PromptTemplate.from_template(template=conversation_metadata_prompt_template_str)

In [49]:
question = "Cho thông tin về thầy Đức Anh dạy môn Machine Learning và cô Huỳnh Thị Thanh Bình môn Tối ưu lập kế hoạch"

conversation_metadata_recognition_prompt = (
    conversation_metadata_prompt_template.format(
        chat_history=question,
        format_instructions=conversation_metadata_output_schema
    )
)

conversation_metadata_detected_str = llm.invoke(conversation_metadata_recognition_prompt)

# conversion from string to python dict
conversation_metadata_detected = conversation_metadata_output_schema_parser.parse(conversation_metadata_detected_str.content)
print(conversation_metadata_detected)

{'name': {'full_name': 'Đức Anh', 'short_name': None}, 'courses': ['Machine Learning', 'Tối ưu lập kế hoạch']}


In [55]:
prompt = """
You are a Name Entity Recognition expert that detects enities based on a question. The question will be about lecturers or a course in Hanoi University of Science and Technology. Here are the labels and their description:

- names: the name of the lecturer .For example, one's name can 'Đinh Viết Sang', but can also be 'Sang'.
- courses: the course/subject name, referring to the course/subject name ('Ứng dụng học sâu', 'Machine Learning').

The lectuer names will be in Vietnames, but the course name can be in English or Vietnamese.  
Return JSON with 2 keys, 'names' and 'courses'. Their values is be a list of entities name. If there are no answer, return an empty JSON with the 2 main keys only. ALWAYS use what is presented in the question for the answer and copy them into the output.

Here is an example output:
{'names': ['Đinh Viết Sang', 'Tạ Hải Tùng', 'Hiếu', 'Đức Anh'], 'courses': ['Machine Learning', 'Tối ưu hóa']}
"""

In [56]:
from langchain_core.messages import HumanMessage, SystemMessage

question = "Cho thông tin về thầy Đức Anh dạy môn Machine Learning và cô Huỳnh Thị Thanh Bình môn Tối ưu lập kế hoạch"

answer = llm_json_mode.invoke(
    [SystemMessage(content=prompt)] +
    [HumanMessage(content=question)]
)

In [57]:
print(answer.content)

{ 
  "names": ["Đức Anh", "Huỳnh Thị Thanh Bình"], 
  "courses": ["Machine Learning", "Tối ưu lập kế hoạch"] 
}


{'names': ['Đức Anh', 'Huỳnh Thị Thanh Bình'], 'courses': ['Machine Learning', 'Tối ưu lập kế hoạch']}


In [None]:
import os
from src.database.db_utils import setup_database
from src.utils.utils import get_database
from src.database.sql_dataloader import initialize_database

if os.path.exists("data/lecturers/soict_lecturers.json"):
    # If data file is provided and exists, initialize database with it
    engine, db = initialize_database("data/lecturers/soict_lecturers.json")
else:
    # Otherwise, just connect to the existing database
    engine = setup_database()
    db = get_database(engine)

ModuleNotFoundError: No module named 'database'