# Test Trainer

In [1]:
from sagemaker.session import Session, get_execution_role
from sagemaker.estimator import Estimator
import os

In [2]:
def get_execution_role_in_local(sagemaker_session):
    role = sagemaker_session.boto_session.client("iam").get_role(
        RoleName="AmazonSageMaker-ExecutionRole-20230105T181131"
    )["Role"]["Arn"]
    return role

In [3]:
def load_env_variables(*env_files):
    env_vars = dict()
    get_values = lambda x: [(x.split("=")[0], x.split("=")[1])]
    for env_file in env_files:
        with open(env_file, "r") as file:
            env_vars.update(
                dict(
                    [
                        (key.strip(), value.strip()) 
                        for line in file.readlines()
                        for key, value in get_values(line)
                    ]
                )
            )
    return env_vars

In [4]:
def create_container_registry(ecr_client, repository_name, account_id):
    try:
        repository_info = ecr_client.create_repository(
            repositoryName=repository_name,
            tags=[
                {
                    "Key": "Test",
                    "Value": "True"
                }
            ],
            encryptionConfiguration={
                'encryptionType': 'AES256'
            }
        )["repository"]
    except ecr_client.exceptions.RepositoryAlreadyExistsException:
        print("repository already exists!")
        repository_info = ecr_client.describe_repositories(
            registryId=account_id,
            repositoryNames=[repository_name]
        )["repositories"][0]
    return repository_info

In [5]:
sagemaker_session = Session()
try:
    sagemaker_role = get_execution_role()
except:
    sagemaker_role = get_execution_role_in_local(sagemaker_session)

# Configurations

In [6]:
def get_configurations(stage = "staging"):
    environment = load_env_variables("../vars.env", f"../vars.{stage}.env")
    return environment

In [7]:
def set_environ_temporal_variables(**variables):
    for name, value in variables.items():
        os.environ[name] = value

In [8]:
ecr_client = sagemaker_session.boto_session.client("ecr")
account_id = sagemaker_session.account_id()
aws_region = sagemaker_session.boto_region_name
repository_name = "koombea-blogs-train-component"
docker_compose_service_name = "koombea_blogs_train_component"
docker_image_name = "koombea_blogs_train_{}".format(docker_compose_service_name)
stage = "staging"
environment = get_configurations(stage)
# create or get repository info
repository_info = create_container_registry(ecr_client, repository_name, account_id)
repository_uri = repository_info["repositoryUri"]

repository already exists!


In [9]:
set_environ_temporal_variables(
    account_id=account_id,
    aws_region=aws_region,
    docker_compose_service_name=docker_compose_service_name,
    docker_image_name=docker_image_name,
    repository_uri=repository_uri
)

In [10]:
environment

{'PEM_FILE': '/opt/ml/koombea_blogs/connection/dataBaseKey.pem',
 'WANDB_API_KEY': 'd50500f7dd4cd6f2b9800534306d3e22b6acd345',
 'WANDDB_PROJECT_NAME': 'koombea-website-ml',
 'WANDB_ENTITY': 'koombea-marketing',
 'WANDB_MODE': 'online',
 'MYSQL_DBNAME': 'wp_koombea20stg',
 'STAGE': 'dev'}

## Push container to ecr

In [11]:
%%writefile ../scripts/build_and_push_ecr.sh
echo "loging to aws ecr"
aws ecr get-login-password --region ${aws_region} | docker login --username AWS --password-stdin ${account_id}.dkr.ecr.${aws_region}.amazonaws.com

echo "building and tagging docker container"
cd ..
docker-compose build ${docker_compose_service_name}
docker tag ${docker_image_name}:latest \
    ${repository_uri}:latest

echo "pushing container"
docker push ${repository_uri}:latest
    
echo "cleaning dockers cache"
echo y | docker system prune

Overwriting ../scripts/build_and_push_ecr.sh


In [12]:
!bash ../scripts/build_and_push_ecr.sh

loging to aws ecr
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
building and tagging docker container
Building koombea_blogs_train_component
Sending build context to Docker daemon  1.393MB
Step 1/25 : FROM continuumio/miniconda3
 ---> 45461d36cbf1
Step 2/25 : WORKDIR /opt/ml/code
 ---> Using cache
 ---> 1d42458c2aad
Step 3/25 : RUN apt-get update && apt-get install -y --no-install-recommends     curl     gcc      mono-mcs     build-essential     ca-certificates     wget     pkg-config
 ---> Using cache
 ---> 8b412ca2038b
Step 4/25 : ENV PYTHONDONTWRITEBYTECODE 1
 ---> Using cache
 ---> 580ef0f97b12
Step 5/25 : ENV PYTHONUNBUFFERED 1
 ---> Using cache
 ---> 47863079f919
Step 6/25 : ARG conda_env=python38
 ---> Using cache
 ---> 09472f5e21f3
Step 7/25 : ARG py_ver=3.8.10
 ---> Using cache
 ---> ccf9c0650940
Step 8/25 : RUN conda create --quiet --yes -p "${CONDA_DIR}/envs/${conda_env}" python=${py_ver} &&     conda clean --all -f -y
 ---> U

## Test trainer

In [13]:
base_job_name="koombea-blogs-vector-train"
bucket_name = sagemaker_session.default_bucket()

folder_project_name = "koombea_website_ml"
folder_models_name = "koombea_blogs_models"
key_output_prefix = "{}/{}/".format(folder_project_name, folder_models_name)
s3_bucket_output_name = "s3://{}/{}".format(bucket_name, key_output_prefix)

folder_project_name = "koombea_website_ml"
folder_data_name = "koombea_blogs_information"
key_input_prefix = "{}/{}/".format(folder_project_name, folder_data_name)
s3_bucket_input_name = "s3://{}/{}".format(bucket_name, key_input_prefix)

hyperparameters = {
    "min_count":0,
    "size":300,
    "sg":1,
    "window":15,
    "iter":40,
    "sample": 6e-5,
    "hs": 0,
    "negative": 15,
    "ns_exponent": -0.5,
}

In [14]:
# Initialize estimator
estimator = Estimator(
    image_uri=repository_uri,
    role=sagemaker_role,
    instance_count=1,
    instance_type="ml.m5.large",
    base_job_name=base_job_name,
    sagemaker_session=sagemaker_session,
    hyperparameters=hyperparameters,
    output_path=s3_bucket_output_name,
    environment=environment
)

In [15]:
estimator.fit({"training": s3_bucket_input_name})

INFO:sagemaker:Creating training-job with name: koombea-blogs-vector-train-2023-02-08-16-28-31-172


2023-02-08 16:28:31 Starting - Starting the training job...
2023-02-08 16:28:45 Starting - Preparing the instances for training......
2023-02-08 16:29:38 Downloading - Downloading input data...
2023-02-08 16:30:03 Training - Downloading the training image..[34m2023-02-08 16:30:42,866 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/envs/python38/bin/python -m pip install -r requirements.txt[0m
[34mCollecting numpy==1.21.0
  Using cached numpy-1.21.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)[0m
[34mInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.5
    Uninstalling numpy-1.23.5:
      Successfully uninstalled numpy-1.23.5[0m
[34mSuccessfully installed numpy-1.21.0[0m
[34m2023-02-08 16:30:46,686 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-02-08 16:30:46,688 sagemaker-training-toolkit INFO     No Ne

In [16]:
estimator.model_data

's3://sagemaker-us-west-2-256305374409/koombea_website_ml/koombea_blogs_models/koombea-blogs-vector-train-2023-02-08-16-28-31-172/output/model.tar.gz'