## > Setup for all Labs

In [None]:
%pip install -r requirements.txt

## > Initial setup Lab01

In [None]:
from utils import (
    upload_file_to_s3,
)
import sagemaker
import os
import time

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()  # execution role for the endpoint
region = sagemaker_session._region_name

bucket = sagemaker_session.default_bucket()
prefix = "swagger_codegen"

data_dir = "../data/lab01"
yml_dir = f"{data_dir}/yml_files"
uml_dir = f"{data_dir}/uml_diagrams"

### > Upload data to S3 to setup the lab

In [None]:
data_dirs= [yml_dir,
            uml_dir,
            f"{data_dir}/yml_questions",
            f"{data_dir}/uml_questions"]

for ddir in data_dirs:
    for filename in os.listdir(ddir):
        filepath = os.path.join(ddir, filename)

        # yml upload file to s3
        key = f"{prefix}/{filepath.replace(data_dir+'/', '')}"
        s3_path = upload_file_to_s3(filepath, bucket, key)

    print(f"Sync data to S3 from {ddir} =========")

### > Store the parameter for future labs

In [None]:
%store bucket
%store prefix
%store yml_dir
%store uml_dir
%store data_dir
print(bucket)

## > Initial Setup Lab02

In [None]:
import boto3
import pprint as pp


from os_utils import (
    create_bedrock_execution_role,
    create_policies_in_oss,
    create_oss_policy_attach_bedrock_execution_role,
    account_number,
    bedrock_execution_role_name
)

boto3_session = boto3.Session()
region_name = boto3_session.region_name
aoss_client = boto3_session.client('opensearchserverless')

vector_store_name = 'swagger-api-vector'

exists = aoss_client.list_collections(
    collectionFilters={'name': vector_store_name},
    maxResults=10
)

if len(exists['collectionSummaries']) == 0:
    
    bedrock_kb_execution_role = create_bedrock_execution_role(bucket_name=bucket)
    bedrock_kb_execution_role_arn = bedrock_kb_execution_role['Role']['Arn']
    
    # create security, network and data access policies within OSS
    encryption_policy, network_policy, access_policy = create_policies_in_oss(vector_store_name=vector_store_name,
                       aoss_client=aoss_client,
                       bedrock_kb_execution_role_arn=bedrock_kb_execution_role_arn)


    print(f"creating vector collection {vector_store_name}")
    vector_collection = aoss_client.create_collection(name=vector_store_name,type='VECTORSEARCH')
    vector_collection_arn = vector_collection["createCollectionDetail"]['arn']
    vector_collection_id = vector_collection['createCollectionDetail']['id']
    
    pp.pprint(vector_collection)
    time.sleep(10)
    
    # create oss policy and attach it to Bedrock execution role
    create_oss_policy_attach_bedrock_execution_role(collection_id=vector_collection_id,
                                                bedrock_kb_execution_role=bedrock_kb_execution_role)

else:
    print(f"vector collection {vector_store_name} exists")
    vector_collection_arn = exists['collectionSummaries'][0]['arn']
    vector_collection_id = exists['collectionSummaries'][0]['id']
    bedrock_kb_execution_role_arn=f"arn:aws:iam::{account_number}:role/{bedrock_execution_role_name}"

vector_host = vector_collection_id + '.' + region_name + '.aoss.amazonaws.com'
print(vector_host)
print(vector_collection_arn)
print(vector_collection_id)
print(bedrock_kb_execution_role_arn or "NOT FOUND")

In [None]:
%store vector_store_name
%store vector_collection_arn
%store vector_collection_id
%store vector_host
%store bedrock_kb_execution_role_arn

## > Initial Setup Lab03

In [None]:
from os_utils import(
    create_lambda_role,
    create_lambda,
    suffix
)

# create Lambda Role
agent_name = f'swagger-api-agent-{suffix}'
lambda_iam_role = create_lambda_role(agent_name)

# create Lambda Function

lambda_function_name = f'{agent_name}-lambda'
lambda_function = create_lambda("lambda_function.py", lambda_function_name, lambda_iam_role)

lambda_arn = lambda_function['FunctionArn']
print(f"Lab 03 Lambda ARN: {lambda_arn}")

In [None]:
%store lambda_arn
%store lambda_function_name

## > Initial Setup Lab04

In [None]:
lab04_prefix = "finetune-embedding"

model_id = "sentence-transformers/msmarco-bert-base-dot-v5"

In [None]:
train_data = "train_dataset.json"
train_local_path = f"../data/lab04/{train_data}"

train_s3_path = f"s3://{bucket}/{lab04_prefix}/{train_data}"

!aws s3 cp {train_local_path} {train_s3_path}

In [None]:
valid_data = "val_dataset.json"
valid_local_path = f"../data/lab04/{valid_data}"

valid_s3_path = f"s3://{bucket}/{lab04_prefix}/{valid_data}"

!aws s3 cp {valid_local_path} {valid_s3_path}

In [None]:
%store lab04_prefix
%store train_s3_path
%store valid_s3_path
%store train_local_path
%store valid_local_path
%store model_id