# 创建训练流水线

## 导语

此笔记旨在创建一个训练流水线，以便我们可以训练一个模型。

In [None]:
HOST = "http://localhost:5002"
PROJECT_ID = "tokyo-rain-42"
LOCATION = "default"

## 编写训练代码


In [1]:
! mkdir -p /tmp/aip/create-training-pipeline
! touch /tmp/aip/create-training-pipeline/__init__.py

使用魔术命令 `%%writefile` 将训练代码写入 `/tmp/aip/create-train-pipeline/aip_task.py` 文件中。

In [2]:
%%writefile /tmp/aip/create-training-pipeline/aip_task.py

import numpy as np
from tensorflow import keras


def get_args():
    import os
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model-dir",
        dest="model_dir",
        default=os.getenv("AIP_MODEL_DIR"),
        type=str,
        help="Model dir.",
    )
    return parser.parse_args()


args = get_args()


def get_model():
    # Create a simple model.
    inputs = keras.Input(shape=(32,))
    outputs = keras.layers.Dense(1)(inputs)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer="adam", loss="mean_squared_error")
    return model


model = get_model()

# Train the model.
test_input = np.random.random((128, 32))
test_target = np.random.random((128, 1))
model.fit(test_input, test_target)


def s3_save_keras_model(model, model_name):
    import re
    import tempfile
    import boto3

    with tempfile.TemporaryDirectory() as tempdir:
        local_path = f"{tempdir}/{model_name}"
        model.save(local_path)

        bucket_name, model_dir = re.match("s3://([^/]+)/(.*)", args.model_dir).groups()
        remote_path = f"{model_dir}{model_name}"
        client = boto3.client("s3")
        client.upload_file(Filename=local_path, Bucket=bucket_name, Key=remote_path)


# Calling `save('my_model')` creates a SavedModel folder `my_model`.
s3_save_keras_model(model, "my_model.h5")

Overwriting /tmp/aip/create-training-pipeline/aip_task.py


## 打包训练代码


In [3]:
! rm -f /tmp/aip/create-training-pipeline.tar.gz
! tar -zcvf /tmp/aip/create-training-pipeline.tar.gz -C /tmp/aip/ create-training-pipeline/
! aws s3 cp /tmp/aip/create-training-pipeline.tar.gz s3://aip-demo/usages/create-training-pipeline.tar.gz

a create-training-pipeline
a create-training-pipeline/aip_task.py
a create-training-pipeline/__init__.py
upload: ../../../../../../../../tmp/aip/create-training-pipeline.tar.gz to s3://aip-demo/usages/create-training-pipeline.tar.gz


## 发起「创建训练流水线」请求

In [14]:
from datetime import datetime

CONTAINER_URL = "harbor.nadileaf.com/aip-images/create-training-image:0.1.1"
PYTHON_PACKAGE_URI = "s3://aip-demo/usages/create-training-pipeline.tar.gz"
PYTHON_MODULE_NAME = "create-training-pipeline.aip_task"

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
DISPLAY_NAME = "create-train-pipeline_" + TIMESTAMP

DISPLAY_NAME

'create-train-pipeline_20220721182038'

In [23]:
from devtools import pprint
import requests
import json

url = f"{HOST}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/trainingPipelines"

payload = json.dumps(
    {
        "displayName": DISPLAY_NAME,
        "trainingTaskDefinition": "s3://aip/schema/trainingjob/definition/custom_task_1.0.0.yaml",
        "trainingTaskInputs": {
            "workerPoolSpecs": [
                {
                    "pythonPackageSpec": {
                        "executorImageUri": CONTAINER_URL,
                        "packageUris": [PYTHON_PACKAGE_URI],
                        "pythonModule": PYTHON_MODULE_NAME,
                    }
                }
            ]
        },
        "modelToUpload": {
            "displayName": DISPLAY_NAME,
            "predictSchemata": {},
            "containerSpec": {"imageUri": "IMAGE_URI"},
        },
    }
)
headers = {"Content-Type": "application/json"}

response = requests.request("POST", url, headers=headers, data=payload)

pprint(response.json())

{
    'name': 'projects/tokyo-rain-42/locations/default/trainingPipelines/ae4668ea-e95d-49d5-a783-6f20d13d14fb',
    'displayName': 'create-train-pipeline_20220721182038',
    'trainingTaskDefinition': 's3://aip/schema/trainingjob/definition/custom_task_1.0.0.yaml',
    'trainingTaskInputs': {
        'workerPoolSpecs': [
            {
                'pythonPackageSpec': {
                    'executorImageUri': 'harbor.nadileaf.com/aip-images/create-training-image:0.1.1',
                    'packageUris': [
                        's3://aip-demo/usages/create-training-pipeline.tar.gz',
                    ],
                    'pythonModule': 'create-training-pipeline.aip_task',
                },
            },
        ],
    },
    'modelToUpload': {
        'name': '',
        'displayName': 'create-train-pipeline_20220721182038',
        'description': '',
        'metadataSchemaUri': '',
        'supportedExportFormats': [],
        'trainingPipeline': 'projects/tokyo-rain-42/

## 获取「训练流水线」的「名字」

In [24]:
TRAINING_PIPELINE_NAME = response.json()["name"]
TRAINING_PIPELINE_NAME

'projects/tokyo-rain-42/locations/default/trainingPipelines/ae4668ea-e95d-49d5-a783-6f20d13d14fb'

## 查询「训练流水线」的「状态」

直到 `state` 字段为 `PIPELINE_STATE_SUCCEEDED`，才能获取到模型的名字。

In [28]:
import requests
from devtools import pprint

url = f"{HOST}/v1/{TRAINING_PIPELINE_NAME}"

payload = {}
headers = {}

response = requests.request("GET", url, headers=headers, data=payload)

pprint(response.json()["state"])
pprint(response.json())

'PIPELINE_STATE_SUCCEEDED'
{
    'name': 'projects/tokyo-rain-42/locations/default/trainingPipelines/ae4668ea-e95d-49d5-a783-6f20d13d14fb',
    'displayName': 'create-train-pipeline_20220721182038',
    'trainingTaskDefinition': 's3://aip/schema/trainingjob/definition/custom_task_1.0.0.yaml',
    'trainingTaskInputs': {
        'workerPoolSpecs': [
            {
                'pythonPackageSpec': {
                    'packageUris': [
                        's3://aip-demo/usages/create-training-pipeline.tar.gz',
                    ],
                    'pythonModule': 'create-training-pipeline.aip_task',
                    'executorImageUri': 'harbor.nadileaf.com/aip-images/create-training-image:0.1.1',
                },
            },
        ],
    },
    'modelToUpload': {
        'name': 'projects/tokyo-rain-42/locations/default/models/418714a7-149a-43d1-9f57-af9160dfc840',
        'displayName': 'create-train-pipeline_20220721182038',
        'description': '',
        'met

## 获取「模型」的「名字」

In [29]:
MODEL_NAME = response.json()["modelToUpload"]["name"]
MODEL_NAME

'projects/tokyo-rain-42/locations/default/models/418714a7-149a-43d1-9f57-af9160dfc840'