# MySQL 데이터 준비

In [None]:
!python -m ensurepip --upgrade
!pip install -U boto3 --quiet
!pip install -U botocore --quiet
!pip install langchain --quiet
!pip install sqlalchemy --quiet
!pip install mysql-connector-python --quiet
!pip install pymysql --quiet

In [None]:
import boto3
import json
import time
import datetime
import os

import sqlalchemy
from sqlalchemy import create_engine
import mysql.connector

아래는 실제 설치된 환경의 host / user / password 정보를 입력합니다.

CloudFormation으로 리소스를 배포했다면 Output 탭에서 결과를 확인할 수 있습니다.

In [None]:
# Define variables for database connection details
db_info = {
    'host': "{database_host}",# get these values from the output value of your cloudformation stack
    'user': "{database_username}",
    'password': "{database_password}",
    'database': "DEMO_DB"
}

# Establish the database connection using the variables
mydb = mysql.connector.connect(
    host=db_info['host'],
    user=db_info['user'],
    password=db_info['password']
)
mycursor = mydb.cursor()

In [None]:
mycursor.execute("DROP DATABASE IF EXISTS DEMO_DB")
mycursor.execute("CREATE DATABASE DEMO_DB")

SQL Generation에 생성할 세 개의 테이블을 생성합니다.

In [None]:
mycursor.execute("DROP TABLE IF EXISTS DEMO_DB.EMPLOYEE_TABLE")
mycursor.execute("""
CREATE TABLE DEMO_DB.EMPLOYEE_TABLE -- Table name
(
    EMPID INT(10), -- employee id of the employee
    NAME VARCHAR(20), -- name of the employee
    SALARY INT(10), -- salary that the employee gets or makes (USD)
    BONUS INT(10),-- bonus that the employee gets or makes (USD)
    CITY VARCHAR(20), -- city where employees work from or belongs to
    JOINING_DATE TIMESTAMP,-- date of joining for the employee
    ACTIVE_EMPLOYEE INT(2), -- whether the employee is active(1) or in active(0)
    DEPARTMENT VARCHAR(20), -- the deparment name where employee works or belongs to
    TITLE VARCHAR(20) -- the title in office which employees has or holds
)
""")

mycursor.execute("DROP TABLE IF EXISTS DEMO_DB.DEPARTMENT_TABLE")
mycursor.execute("""
CREATE TABLE DEMO_DB.DEPARTMENT_TABLE
(
    DEPT_ID INT(10) PRIMARY KEY, -- Department ID (Primary Key)
    DEPT_NAME VARCHAR(20) NOT NULL, -- Department Name
    LOCATION VARCHAR(20), -- Department Location
    HEAD_COUNT INT(10), -- Number of Employees in the Department
    BUDGET_ALLOCATION INT(15), -- Budget Allocated to the Department
    MANAGER_ID INT(10) -- Manager ID of the Department
)
""")

mycursor.execute("DROP TABLE IF EXISTS DEMO_DB.LEAVE_TABLE")
mycursor.execute("""
CREATE TABLE DEMO_DB.LEAVE_TABLE
(
  LEAVE_ID INT(10) PRIMARY KEY, -- Leave ID (Primary Key)
  EMPLOYEE_ID INT(10) NOT NULL, -- Employee ID (Foreign Key referencing EMPLOYEE_TABLE)
  LEAVE_TYPE VARCHAR(20) NOT NULL, -- Leave Type (e.g., Annual, Sick, Maternity)
  START_DATE DATE NOT NULL, -- Start Date of the Leave
  END_DATE DATE NOT NULL, -- End Date of the Leave
  REASON VARCHAR(50) -- Reason for the Leave
)
""")

In [None]:
def insert_data_from_json(cursor, db_info, json_directory='libs/mysql-samples/'):
    json_files = {
        'employee_table.json': ('DEMO_DB.EMPLOYEE_TABLE', ['EMPID', 'NAME', 'SALARY', 'BONUS', 'CITY', 'JOINING_DATE', 'ACTIVE_EMPLOYEE', 'DEPARTMENT', 'TITLE']),
        'department_table.json': ('DEMO_DB.DEPARTMENT_TABLE', ['DEPT_ID', 'DEPT_NAME', 'LOCATION', 'HEAD_COUNT', 'BUDGET_ALLOCATION', 'MANAGER_ID']),
        'leave_table.json': ('DEMO_DB.LEAVE_TABLE', ['LEAVE_ID', 'EMPLOYEE_ID', 'LEAVE_TYPE', 'START_DATE', 'END_DATE', 'REASON'])
    }

    try:
        for json_file, (table_name, columns) in json_files.items():
            full_path = json_directory + json_file  # construct the full path for each file
            with open(full_path, 'r') as file:
                data = json.load(file)
            for record in data:
                placeholders = ', '.join(['%s' for _ in columns])
                values = tuple(record[col] for col in columns)
                
                sql = "INSERT INTO {} ({}) VALUES ({})".format(table_name, ', '.join(columns), placeholders)
                cursor.execute(sql, values)
                db_info.commit()  # ensure changes are committed to the database
    except Exception as e:
        print(f"Error occurred: {e}")


libs/mysql-samples에 사전 저장된 샘플 데이터를 테이블에 삽입합니다.

In [None]:
insert_data_from_json(mycursor, db_info)

아래에 데이터가 조회되면 정상 처리된 것입니다.

In [None]:
mycursor.execute("SELECT * FROM DEMO_DB.EMPLOYEE_TABLE")
employee_result = mycursor.fetchall()

print("Employees:")
for employee in employee_result:
    print(employee)

mycursor.execute("SELECT * FROM DEMO_DB.DEPARTMENT_TABLE")
department_result = mycursor.fetchall()

print("\nDepartments:")
for department in department_result:
    print(department)

mycursor.execute("SELECT * FROM DEMO_DB.LEAVE_TABLE")
leave_result = mycursor.fetchall()

print("\nLeaves:")
for leave in leave_result:
    print(leave)


# SQL 쿼리 생성 - LangChain SQLDatabase 활용

In [None]:
from langchain.sql_database import SQLDatabase
import pymysql

In [None]:
url = f"mysql+pymysql://{db_info['user']}:{db_info['password']}@{db_info['host']}/DEMO_DB"
db = SQLDatabase.from_uri(url)

In [None]:
from langchain_community.chat_models import BedrockChat
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [None]:
bedrock_client = boto3.client(
    service_name='bedrock-runtime'
)

llm = BedrockChat(
	model_id='anthropic.claude-3-haiku-20240307-v1:0',
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()],
	model_kwargs={"temperature":0},
	client=bedrock_client
)

`create_sql_query_chain`은 데이터베이스의 스키마를 조회하고, 질문에 맞는 쿼리를 생성하는 작업을 자동으로 처리하는 Chain입니다.

이 방법은 주로 소규모 데이터베이스를 대상으로 복잡도가 낮은 쿼리를 수행할 때 잘 동작합니다.

- Chains: 사전에 결정되어 있는 연속된 작업 수행
- Agents: 어떤 작업을 할 것인지 LLM이 판단해서 작업 수행

SQL Chain에서 쿼리 생성에 사용하는 프롬프트 내용을 아래와 같이 조회할 수 있습니다.

In [None]:
from langchain.chains import create_sql_query_chain

In [None]:
chain = create_sql_query_chain(llm, db)
chain.get_prompts()[0].pretty_print()

이제 사용자 질문을 SQL Chain에 전달하여 SQL 쿼리를 생성합니다.

In [None]:
#question = "각 부서에 있는 active employee의 수를 알려주세요"
#question = "각 직원의 급여와 보너스를 합산한 'Total Compensation'을 계산하고, 총 급여가 가장 높은 부서를 알려주세요"
question = "각 부서에서 가장 오랜 기간 휴가를 사용한 직원들의 Total Compensation (USD) 을 계산해주세요"
response = chain.invoke({"question":question})

# 생성된 SQL 쿼리 실행 및 답변 생성

답변에서 SQL 쿼리 부분을 파싱한 다음, 쿼리를 수행합니다.

In [None]:
SQL = response.split("SQLQuery:")[1].strip()

mycursor.execute("USE DEMO_DB")
mycursor.execute(SQL)
myresult = mycursor.fetchall()
print(myresult)

쿼리 결과를 바탕으로 답변을 생성합니다.

In [None]:
from langchain import PromptTemplate
from langchain_core.messages import HumanMessage

In [None]:
def answer_with_data(prompt, question, reference_data):
    prompt_for_answer = PromptTemplate.from_template(prompt)
    messages = [
        HumanMessage(
            content = prompt_for_answer.format(question=question, reference=reference_data)
        )
    ]
    final_answer = llm(messages)
    return final_answer

In [None]:
prompt = """
Human: Based on the question below

{question}

the reference data were given below. 

{reference}

Provide answer in simple Korean statement and don't include table or schema names.
Assistant: 
"""

In [None]:
final_answer = answer_with_data(prompt, question, myresult)