In [5]:
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.tensorflow import TensorFlow
import boto3

#### 训练

In [6]:
# 上传数据
sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'lstmNer/data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'example.dev')).upload_file('MyNerTF2_aws/data/data_zh/samples/example.dev')
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'example.test')).upload_file('MyNerTF2_aws/data/data_zh/samples/example.test')
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'example.train')).upload_file('MyNerTF2_aws/data/data_zh/samples/example.train')

In [9]:
# 创建sagemaker estimator

role = get_execution_role()
region = sagemaker_session.boto_session.region_name

# 数据来源
training_data_uri = 's3://{}/{}'.format(bucket, prefix)


myner_estimator = TensorFlow(entry_point='model_GPU.py',             
                          source_dir='./MyNerTF2_aws',               
                          role=role,
                          output_path='s3://{}/lstmNer'.format(bucket),  
                          code_location='s3://{}/lstmNer'.format(bucket),
                          checkpoint_s3_uri='s3://{}/lstmNer/checkpoints'.format(bucket),
                          train_instance_count=1,                   
                          train_instance_type='ml.p3.8xlarge',
                          framework_version='2.3.1',
                          py_version='py37',
                          disable_profiler=True,
                          train_use_spot_instances=True,
                          train_max_wait=7200,
                          train_max_run=3600,
                          )

train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_max_run has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_use_spot_instances has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_max_wait has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [10]:
myner_estimator.fit(training_data_uri)

2021-07-13 03:51:58 Starting - Starting the training job...
2021-07-13 03:52:04 Starting - Launching requested ML instances......
2021-07-13 03:53:06 Starting - Insufficient capacity error from EC2 while launching instances, retrying!.........
2021-07-13 03:54:45 Starting - Preparing the instances for training......
2021-07-13 03:55:56 Downloading - Downloading input data...
2021-07-13 03:56:11 Training - Downloading the training image...........[34m2021-07-13 03:58:10.455267: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2021-07-13 03:58:10.459768: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34m2021-07-13 03:58:10.819660: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2[0m
[34m2021-07-13 03:58:10.946813: 

#### 部署

In [11]:
predictor = myner_estimator.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-------------!

#### 在线推理

In [12]:
input_data = {
    'instances': [
        [2450, 2127, 7057, 7031, 6982, 2420, 0,
         0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0]
    ]
}

In [13]:
predictor.predict(input_data)

{'predictions': [[3.0,
   13.0,
   13.0,
   13.0,
   0.0,
   5.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0,
   17.0]]}

#### 离线批量推理



In [19]:
import pandas as pd

row = [2450, 2127, 7057, 7031, 6982, 2420, 0,
         0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0]

test_df = pd.DataFrame([row for i in range(5)])
test_df.to_csv("batch_test_data.csv", index=False, header=False)

In [20]:
import boto3 

boto3.Session().resource('s3').Bucket(bucket).Object('lstmNer/transform/batch_test_data.csv').upload_file('batch_test_data.csv')

In [21]:
s3_batch_input = 's3://{}/lstmNer/transform'.format(bucket)
s3_batch_output = 's3://{}/lstmNer/transform_output'.format(bucket)

In [22]:
myner_transformer = myner_estimator.transformer(instance_count=1, instance_type='ml.m5.xlarge', output_path=s3_batch_output)

In [23]:
myner_transformer.transform(data=s3_batch_input, data_type='S3Prefix', content_type='text/csv', split_type='Line')

..........................[34mINFO:__main__:starting services[0m
[34mINFO:tfs_utils:using default model name: model[0m
[34mINFO:tfs_utils:tensorflow serving model config: [0m
[34mmodel_config_list: {
  config: {
    name: "model",
    base_path: "/opt/ml/model",
    model_platform: "tensorflow"
  }[0m
[34m}

[0m
[34mINFO:__main__:using default model name: model[0m
[34mINFO:__main__:tensorflow serving model config: [0m
[34mmodel_config_list: {
  config: {
    name: "model",
    base_path: "/opt/ml/model",
    model_platform: "tensorflow"
  }[0m
[34m}

[0m
[34mINFO:__main__:tensorflow version info:[0m
[34mTensorFlow ModelServer: 2.3.0-rc0+dev.sha.no_git[0m
[34mTensorFlow Library: 2.3.0[0m
[34mINFO:__main__:tensorflow serving command: tensorflow_model_server --port=10000 --rest_api_port=10001 --model_config_file=/sagemaker/model-config.cfg --max_num_load_retries=0 [0m
[34mINFO:__main__:started tensorflow serving (pid: 11)[0m
[34mINFO:__main__:nginx config: [0