# 필수 패키지 설정 및 OpenSearch 클러스터 생성 (약 40분 소요)
>이 노트북은 SageMaker Studio* **`Python 3 (ipykernel)`** kernel 및 ml.t3.medium 인스턴스에서 테스트 되었습니다.

---
### 중요
- 이 노트북은 Anthropic 의 Claude-v3 Model Access가 허용된 계정에서 실행할 수 있습니다.
- Model Access가 없는 분은 노트북의 코드와 결과 만을 확인해주세요.
- 실행 시 **"과금"** 이 발생이 되는 부분 유념 해주시기 바랍니다.

---

In [2]:
!pip install -r requirements.txt



In [3]:
%load_ext autoreload
%autoreload 2

import sys, os
module_path = "."
sys.path.append(os.path.abspath(module_path))

In [4]:
import boto3
import uuid
import botocore
import time
from utils.ssm import parameter_store

# OpneSearch 클러스터 생성
- 테스트를 위해 Dev=True로 설정해서 진행합니다. 이후 실제 사용 사에는 Dev=False로 해야 합니다.

### 선수 조건
- 아래의 링크를 참조해서 OpenSearch Service 를 생성하고, opensearch_domain_endpoint, http_auth 를 복사해서, 아래 셀의 내용을 대체 하세요.
    - [OpenSearch 생성 가이드](https://github.com/gonsoomoon-ml/Kor-LLM-On-SageMaker/blob/main/2-Lab02-QA-with-RAG/4.rag-fsi-data-workshop/TASK-4_OpenSearch_Creation_and_Vector_Insertion.ipynb)
- 랭체인 오프서처 참고 자료
    - [Langchain Opensearch](https://python.langchain.com/docs/integrations/vectorstores/opensearch)

In [5]:
DEV = True # True일 경우 1-AZ without standby로 생성, False일 경우 3-AZ with standby. 워크샵 목적일 때는 지나친 과금/리소스 방지를 위해 True로 설정하는 것을 권장
VERSION = "2.11" # OpenSearch Version (예: 2.7 / 2.9 / 2.11)

opensearch_user_id = "raguser"
opensearch_user_password = "MarsEarth1!"

In [6]:
# 0. OpenSearch 인증정보 ssm에 저장하기

region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity()["Account"]
opensearch = boto3.client('opensearch', region)
rand_str = uuid.uuid4().hex[:8]
domain_name = f'rag-hol-{rand_str}'

cluster_config_prod = {
    'InstanceCount': 3,
    'InstanceType': 'r6g.large.search',
    'ZoneAwarenessEnabled': True,
    'DedicatedMasterEnabled': True,
    'MultiAZWithStandbyEnabled': True,
    'DedicatedMasterType': 'r6g.large.search',
    'DedicatedMasterCount': 3
}

cluster_config_dev = {
    'InstanceCount': 1,
    'InstanceType': 'r6g.large.search',
    'ZoneAwarenessEnabled': False,
    'DedicatedMasterEnabled': False,
}


ebs_options = {
    'EBSEnabled': True,
    'VolumeType': 'gp3',
    'VolumeSize': 100,
}

advanced_security_options = {
    'Enabled': True,
    'InternalUserDatabaseEnabled': True,
    'MasterUserOptions': {
        'MasterUserName': opensearch_user_id,
        'MasterUserPassword': opensearch_user_password
    }
}

ap = f'{{\"Version\":\"2012-10-17\",\"Statement\":[{{\"Effect\":\"Allow\",\"Principal\":{{\"AWS\":\"*\"}},\"Action\":\"es:*\",\"Resource\":\"arn:aws:es:{region}:{account_id}:domain\/{domain_name}\/*\"}}]}}'

if DEV:
    cluster_config = cluster_config_dev
else:
    cluster_config = cluster_config_prod

response = opensearch.create_domain(
    DomainName=domain_name,
    EngineVersion=f'OpenSearch_{VERSION}',
    ClusterConfig=cluster_config,
    AccessPolicies=ap,
    EBSOptions=ebs_options,
    AdvancedSecurityOptions=advanced_security_options,
    NodeToNodeEncryptionOptions={'Enabled': True},
    EncryptionAtRestOptions={'Enabled': True},
    DomainEndpointOptions={'EnforceHTTPS': True}
)


In [7]:
%%time

# 1. OpenSearch 설치

def wait_for_domain_creation(domain_name):
    try:
        response = opensearch.describe_domain(
            DomainName=domain_name
        )
        # Every 60 seconds, check whether the domain is processing.
        while 'Endpoint' not in response['DomainStatus']:
            print('Creating Opensearch domain...')
            time.sleep(60)
            response = opensearch.describe_domain(
                DomainName=domain_name)

        # Once we exit the loop, the domain is ready for ingestion.
        endpoint = response['DomainStatus']['Endpoint']
        print('Domain endpoint ready to receive data: ' + endpoint)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ResourceNotFoundException':
            print('Domain not found.')
        else:
            raise error

wait_for_domain_creation(domain_name)

response = opensearch.describe_domain(DomainName=domain_name)
opensearch_domain_endpoint = f"https://{response['DomainStatus']['Endpoint']}"

# 2. OpenSearch 인증정보 ssm에 저장하기

region=boto3.Session().region_name
pm = parameter_store(region)

pm.put_params(
    key="opensearch_domain_endpoint",
    value=f'{opensearch_domain_endpoint}',
    overwrite=True,
    enc=False
)

pm.put_params(
    key="opensearch_user_id",
    value=f'{opensearch_user_id}',
    overwrite=True,
    enc=False
)

pm.put_params(
    key="opensearch_user_password",
    value=f'{opensearch_user_password}',
    overwrite=True,
    enc=True
)

# 3. 한국어 분석을 위한 노리(Nori) 플러그인 설치

nori_pkg_id = {}
nori_pkg_id['us-east-1'] = {
    '2.3': 'G196105221',
    '2.5': 'G240285063',
    '2.7': 'G16029449', 
    '2.9': 'G60209291',
    '2.11': 'G181660338'
}

nori_pkg_id['us-west-2'] = {
    '2.3': 'G94047474',
    '2.5': 'G138227316',
    '2.7': 'G182407158', 
    '2.9': 'G226587000',
    '2.11': 'G79602591'
}

pkg_response = opensearch.associate_package(
    PackageID=nori_pkg_id[region][VERSION], # nori plugin
    DomainName=domain_name
)

def wait_for_associate_package(domain_name, max_results=1):

    response = opensearch.list_packages_for_domain(
        DomainName=domain_name,
        MaxResults=1
    )
    # Every 60 seconds, check whether the domain is processing.
    while response['DomainPackageDetailsList'][0]['DomainPackageStatus'] == "ASSOCIATING":
        print('Associating packages...')
        time.sleep(60)
        response = opensearch.list_packages_for_domain(
            DomainName=domain_name,
            MaxResults=1
        )

    print('Nori Plugin Associated!')

wait_for_associate_package(domain_name)

Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Creating Opensearch domain...
Domain endpoint ready to receive data: search-rag-hol-b1c12dcc-6e5y7l5iniphi5mmhbh3w5gni4.us-east-1.es.amazonaws.com
Parameter stored successfully.
Parameter stored successfully.
Parameter stored successfully.
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating packages...
Associating pack

In [8]:
print (pm.get_params(key="opensearch_domain_endpoint", enc=False))
print (pm.get_params(key="opensearch_user_id", enc=False))
print (pm.get_params(key="opensearch_user_password", enc=True))

https://search-rag-hol-b1c12dcc-6e5y7l5iniphi5mmhbh3w5gni4.us-east-1.es.amazonaws.com
raguser
MarsEarth1!
