# <div align='center'> 测试新架构API(本文件不在维护) </div>

In [13]:
import requests
import json
import consul
import time
import socket

## 零. 辅助功能

In [14]:
# 获取本机器IP
def get_host_ip():
    ip=''
    try:
        s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
        s.connect(('8.8.8.8',80))
        ip = s.getsockname()[0]
        s.close()
    except Exception as err:
        print(err)
    return ip

# API异步接口将过程中产出的数据同步到consul数据中心, 调用该接口可从中心获取数据 (数据中心仅供开发使用)
def query_async_data(key, wait = 1):
    time.sleep(wait)
    try:
        client = consul.Consul(consul_addr, consul_port)
        _, data = client.kv.get(key)
        return str(data['Value'], encoding="utf-8")
    except:
        return 'Query nothing, try again later!'
    
# 格式化打印json字符串
def print_json(text):
    if isinstance(text, str):
        print(json.dumps(json.loads(text), indent=4))
    else:
        print(json.dumps(text, indent=4))

In [15]:
host = get_host_ip()
port = 8119

user = 'test'
uuid = '123456'

consul_addr = host
consul_port = 8500

## 一. 机器学习框架

### 1. 训练`/k12ai/framework/train`

#### A. 数据结构

1. 输入

```json
{
    "op":"", // [M] api具有子集操作, 如: "train.start", "train.stop"
    "user": "", // [M] 用户ID, api调用者, 如: 用户账号user_id(不含有特殊字符)
    "service_name": "", // [M] 服务名, k12ai由多个子框架组成, 需要指定具体服务, 如: "k12nlp", "k12cv"
    "service_uuid": "", // [M] 服务唯一标识码, 可以是用户项目的工程ID, project_id.
    "service_params": { // [M] 服务参数
        "dataset_reader": { // [M] 训练数据集处理配置
        },
        "validation_dataset_reader":{ // [O] 检验模型数据集的处理配置
        },
        "train_data_path": "", // [M] 训练数据集的路径
        "validation_data_path": "", // [O] 检验模型数据集的路径
        "model":{ // [M] 模型的配置 
        },
        "iterator":{ // [M] 训练/检验模型时采用的迭代方式
        },
        "trainer":{ // [M] 训练相关的参数配置
        }
    }
}
```

#### B. 配置实例

In [7]:
config = '''{
    "dataset_reader":{
        "type": "sst_tokens",
        "use_subtrees": true,
        "granularity": "5-class"
    },
    "validation_dataset_reader":{
        "type": "sst_tokens",
        "use_subtrees": false,
        "granularity": "5-class"
    },
    "train_data_path": "/data/datasets/nlp/sst/train.txt",
    "validation_data_path": "/data/datasets/nlp/sst/dev.txt",
    "test_data_path": "/data/datasets/nlp/sst/test.txt",
    "model": {
        "type": "bcn",
        "text_field_embedder": {
            "token_embedders": {
                "tokens": {
                    "type": "embedding",
                    "embedding_dim": 50,
                    "pretrained_file": "/data/datasets/nlp/glove/glove.6B.50d.txt.gz",
                    "trainable": false
                }
            }
        },
        "embedding_dropout": 0.25,
        "pre_encode_feedforward": {
            "input_dim": 50,
            "num_layers": 1,
            "hidden_dims": [50],
            "activations": ["relu"],
            "dropout": [0.25]
        },
        "encoder": {
            "type": "lstm",
            "input_size": 50,
            "hidden_size": 50,
            "num_layers": 1,
            "bidirectional": true
        },
        "integrator": {
            "type": "lstm",
            "input_size": 300,
            "hidden_size": 50,
            "num_layers": 1,
            "bidirectional": true
        },
        "integrator_dropout": 0.1,
        "output_layer": {
            "input_dim": 400,
            "num_layers": 3,
            "output_dims": [200, 100, 5],
            "pool_sizes": 4,
            "dropout": [0.2, 0.3, 0.0]
        }
    },
    "iterator": {
        "type": "bucket",
        "sorting_keys": [["tokens", "num_tokens"]],
        "batch_size" : 2
    },
    "trainer": {
        "num_epochs": 10,
        "patience": 2,
        "grad_norm": 5.0,
        "validation_metric": "+accuracy",
        "optimizer": {
            "type": "adam",
            "lr": 0.001
        },
        "cuda_device": 0
    }
}'''
# json.loads(config)

#### C. 启动训练

In [8]:
data = '''{
    "op":"train.start",
    "user": "%s",
    "service_name": "k12nlp",
    "service_uuid": "%s",
    "service_params": %s
}''' % (user, uuid, config)

api = 'http://%s:%d/k12ai/framework/train' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100200,
    "descr": "task service success",
    "message": "train.start task cache directory: /data/users/test/123456"
}


[操作是异步的, 训练过程结果到Consul数据中心查看](http://gamma:8500/ui/gamma/kv/framework/test/123456/)

输出实例:
```json
{
    "version": "0.1.0",
    "type": "metrics",   // 表示类型是metrics
    "tag": "framework",
    "op": "train.start",
    "user": "test",
    "service_uuid": "123456",
    "timestamp": 1574428843011,
    "datetime": "2019-11-22 21:20:43",
    "metrics": {
        "accuracy": 0.7070127118644067,
        "accuracy3": 0.9471610169491526,
        "loss": 0.8009904705303705
    }
}
```

In [10]:
print(query_async_data('framework/%s/%s/train.start/metrics'%(user, uuid)))

{
    "version": "0.1.0",
    "type": "metrics",
    "tag": "framework",
    "op": "train.start",
    "user": "test",
    "service_uuid": "123456",
    "timestamp": 1574687795489,
    "datetime": "2019-11-25 21:16:35",
    "metrics": {
        "accuracy": 0.69,
        "accuracy3": 0.9336,
        "loss": 0.8899753094911576
    }
}


#### D. 停止训练

In [11]:
data = '''{
    "op":"train.stop",
    "user": "%s",
    "service_name": "k12nlp",
    "service_uuid": "%s",
    "service_params": {}
}''' % (user, uuid)

api = 'http://%s:%d/k12ai/framework/train' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100200,
    "descr": "task service success"
}


[操作是异步的, 真实启动的结果到Consul数据中心查看](http://gamma:8500/ui/gamma/kv/framework/test/123456/)

In [12]:
print(query_async_data('framework/%s/%s/train.stop'%(user, uuid)))

Query nothing, try again later!


### 2. 评估`/k12ai/framework/evaluate`

#### A. 数据结构

1. 输入

```json
{
    "op":"", // [M] api具有子集操作, 如: "evaluate.start", "evaluate.stop"
    "user": "", // [M] 用户ID, api调用者, 如: 用户账号user_id(不含有特殊字符)
    "service_name": "", // [M] 服务名, k12ai由多个子框架组成, 需要指定具体服务, 如: "k12nlp", "k12cv"
    "service_uuid": "", // [M] 服务唯一标识码, 可以是用户项目的工程ID, project_id.
    "service_params": { // [M] 服务参数
        "input_file": "" // [M] 评估/测试数据集的路径
        "output_file": "" // [O] 指定评估最终结果的metrics存储文件
    }
}
```

2. 输出

```json
{
    "code": 100000, // [M] 操作结果码, (异步)TODO: 100000只能说明接口调用是通的, 不能说明训练是成功的.
    "content": {  // [O] 操作结果详情
        "result": {
            "op": "evaluate.stop",
            "exec": "success"
        }
    }
}
```

#### B. 启动评估

In [13]:
data = '''{
    "op":"evaluate.start",
    "user": "%s",
    "service_name": "k12nlp",
    "service_uuid": "%s",
    "service_params": {
        "input_file": "/data/datasets/nlp/sst/test.txt"
    }
}''' % (user, uuid)

api = 'http://%s:%d/k12ai/framework/evaluate' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100202,
    "descr": "task service start fails",
    "message": "model.tar.gz is not found in /data/users/test/123456"
}


[操作是异步的, 真实启动的结果到Consul数据中心查看](http://gamma:8500/ui/gamma/kv/framework/test/123456/)

评估过程中消息如下:

```json
{
    "version": "0.1.0", // 消息体的版本号, 为了兼容设定
    "op": "evaluate.start", // 操作
    "user": "test", // 用户ID
    "service_uuid": "123456", // 工程ID
    "timestamp": 1574224024170, // 时间戳
    "datetime": "2019-11-20 12:27:04", // 日期
    "message": { // 具体消息, 阶段不同, 消息结构和内容都有可能发生变化
        "accuracy": 0.4027777777777778,
        "accuracy3": 0.8255555555555556,
        "loss": 1.3810093342098926
    }
}
```

In [10]:
print(query_async_data('framework/%s/%s/evaluate.start'%(user, uuid)))

Query nothing, try again later!


#### C. 停止评估

In [11]:
data = '''{
    "op":"evaluate.stop",
    "user": "%s",
    "service_name": "k12nlp",
    "service_uuid": "%s"
}''' % (user, uuid)

api = 'http://%s:%d/k12ai/framework/evaluate' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100200,
    "descr": "task service success"
}


[操作是异步的, 真实启动的结果到Consul数据中心查看](http://gamma:8500/ui/gamma/kv/framework/test/123456/)

停止评估任务, 如果该任务已经结束, 返回结果如下:

```json
{
    "version": "0.1.0",
    "op": "evaluate.stop",
    "user": "test",
    "service_uuid": "123456",
    "timestamp": 1574224373067,
    "datetime": "2019-11-20 12:32:53"
        "result": {
            "code": -1,
            "err": "404 Client Error: Not Found (\"No such container: evaluate-test-123456\")"
        }
    }
}
```

In [12]:
print(query_async_data('framework/%s/%s/evaluate.stop'%(user, uuid)))

Query nothing, try again later!


### 3. 预测`/k12ai/framework/predict`

#### A. 数据结构

1. 输入

```json
{
    "op":"", // [M] api具有子集操作, 如: "predict.start", "predict.stop"
    "user": "", // [M] 用户ID, api调用者, 如: 用户账号user_id(不含有特殊字符)
    "service_name": "", // [M] 服务名, k12ai由多个子框架组成, 需要指定具体服务, 如: "k12nlp", "k12cv"
    "service_uuid": "", // [M] 服务唯一标识码, 可以是用户项目的工程ID, project_id.
    "service_params": { // [M] 服务参数
        "input_type": "", // [M] 指定预测文本的类型, 1. "text": 文本串; 2. "file": 文件
        "input_file": "", // [C] 当input_type=file; 指定预测文本路径
        "input_text": "", // [C] 当input_type=text; 指定预测文本内容
        "output_file": "", // [O] 指定最终结果输出的存储文件
        "batch_size": 32, // [0] 指定预测时使用的batch大小
        "predictor": "", // [O] 指定预测方法类型, 如果不指定则从训练配置config.json中读取
    }
}
```

#### B. 启动预测

In [13]:
text = '{"sentence": "a very well-made, funny and entertaining picture."}'
data = '''{
    "op":"predict.start",
    "user": "%s",
    "service_name": "k12nlp",
    "service_uuid": "%s",
    "service_params": {
        "input_type": "text",
        "input_text": %s,
        "predictor": "text_classifier"
    }
}''' % (user, uuid, json.dumps(text))

api = 'http://%s:%d/k12ai/framework/predict' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100202,
    "descr": "task service start fails",
    "message": "model.tar.gz is not found in /data/users/test/123456"
}


[操作是异步的, 真实启动的结果到Consul数据中心查看](http://gamma:8500/ui/gamma/kv/framework/test/123456/)

输出样例:

```json
{
    "version": "0.1.0",
    "type": "predict",
    "op": "predict.start",
    "user": "test",
    "service_uuid": "123456",
    "timestamp": 1574240762485,
    "datetime": "2019-11-20 17:06:02",
    "predict": {
        "index": 0,  // 表示本次预测是第几个实例
        "prediction": "{\"logits\": [-1.4716334342956543, 1.646652340888977, -2.384199857711792, 2.6578245162963867, -4.671465873718262], \"class_probabilities\": [0.011601723730564117, 0.26228705048561096, 0.004658004269003868, 0.7209802269935608, 0.00047299108700826764], \"label\": \"4\"}\n"
    }
}
```

In [14]:
print(query_async_data('framework/%s/%s/predict.start'%(user, uuid)))

Query nothing, try again later!


#### C. 停止预测

In [15]:
data = '''{
    "op":"predict.stop",
    "user": "%s",
    "service_name": "k12nlp",
    "service_uuid": "%s"
}''' % (user, uuid)

api = 'http://%s:%d/k12ai/framework/predict' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100200,
    "descr": "task service success"
}


[操作是异步的, 真实启动的结果到Consul数据中心查看](http://gamma:8500/ui/gamma/kv/framework/test/123456/)

输出样例:

```json
{
    "version": "0.1.0",
    "type": "error",
    "tag": "framework",
    "op": "predict.stop",
    "user": "test",
    "service_uuid": "123456",
    "timestamp": 1574241361253,
    "datetime": "2019-11-20 17:16:01",
    "error": {
        "code": 100400,
        "descr": "867680f57c" // Container ID
    }
}
```

In [16]:
print(query_async_data('framework/%s/%s/predict.stop/'%(user, uuid)))

Query nothing, try again later!


## 二. 平台资源管理

### 1. 平台状态`/k12ai/platform/stats`

A. 输入

```json
{
    "username": "test",  // [M] 登陆平台用户名 (未使用)
    "password": "test",  // [M] 登陆平台用户密码 (未使用)
    "async": true,  // [O] 默认同步, 即值为false, api调用过程方式
    "query": {  //  [O] 默认query所有项
        "cpus": true, // [O] k12ai主机cpu信息 
        "gpus": true, // [O] k12ai主机gpu信息
        "disks": false, // [O] k12ai主机磁盘信息
        "containers": true // [O] k12ai主机上正在运行的label为"k12ai.service.name"的container信息
    }
}
```

B. 输出

```json
{
    "version": "0.1.0", // [M] 返回json格式的版本号, 兼容版本时使用
    "type: "info",
    "tag": "platform", // [M] 标识输出结果的服务TAG, 可以用来后续消息过滤 
    "op": "stats", // [M] 标识输出结果由哪个功能产出, 一般统一为"/k12ai/{{tag}}/{{op}}"的形式
    "timestamp": , // [M]
    "datetime": , // [M]
    "info": { // [M] 输出结果的正文内容
        "cpu_percent": 47.4,  // [C] k12ai主机cpu使用率, 不区分多cpu 
        "cpu_percent_list": [ // [C] k12ai主机cpu使用率, 每个cpu的使用率
            63.6,
            31.1
        ],
        "cpu_memory_total":, // [C] k12ai主机memory总大小(Byte)
        "cpu_memory_usage":, // [C] k12ai主机memory已使用(Byte)
        "cpu_memory_percent": , // [C] k12ai主机memory使用率
        "gpus": [  // [C] k12ai主机gpu信息, 每个gpu信息列表
            {
                "name": "", // [C] gpu名字, 厂商
                "gpu_percent": , // [C] gpu使用率
                "gpu_memory_total": , // [C] gpu显存总大小(Byte)
                "gpu_memory_usage": , // [C] gpu显存已使用(Byte)
                "gpu_memory_percent": // [C] gpu显存使用率
            }
        ],
        "containers": [ // k12ai主机正在运行的关于训练相关的container信息
            {
                "id": "", // container的ID
                "cpu_percent": , // container的cpu使用率
                "cpu_memory_total":,  // container的memory总大小
                "cpu_memory_usage":,  // container的memory已使用
                "cpu_memory_percent":, // container的memory利用率
                "op": "",  // container正在执行的操作,如:"train.start", "predict.start"等
                "user": "", // container用户ID
                "service_uuid": "" // container执行的任务/服务ID.
            }
        ]
    }
}
```

In [17]:
data = '''{
    "username": "test",
    "password": "test",
    "async": false,
    "query": {
        "cpus": true,
        "gpus": true,
        "disks": false,
        "containers": true
    }
}'''

api = 'http://%s:%d/k12ai/platform/stats' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100200,
    "descr": "task service success",
    "message": {
        "cpu_percent": 0.8,
        "cpu_percent_list": [
            0.5,
            0.6,
            0.9,
            1.0,
            1.0,
            0.5,
            1.0,
            1.0
        ],
        "cpu_memory_total": 50648887296,
        "cpu_memory_usage": 1184727040,
        "cpu_memory_percent": 3.5,
        "gpus": [
            {
                "name": "Tesla P40",
                "gpu_percent": 0.0,
                "gpu_memory_total": 22919.0,
                "gpu_memory_usage": 0.0,
                "gpu_memory_percent": 0.0
            }
        ],
        "containers": []
    }
}


[操作是异步的, 真实启动的结果到Consul数据中心查看](http://gamma:8500/ui/gamma/kv/platform/admin)

```json
{
    "version": "0.1.0",
    "type": "info",
    "tag": "platform",
    "op": "stats",
    "timestamp": 1574420133259,
    "datetime": "2019-11-22 18:55:33",
    "info": {
        "cpu_percent": 6.3,
        "cpu_percent_list": [
            5.5,
            7.0
        ],
        "cpu_memory_total": 4143292416,
        "cpu_memory_usage": 1265950720,
        "cpu_memory_percent": 36.7,
        "gpus": [
            {
                "name": "Tesla P4",
                "gpu_percent": 0.0,
                "gpu_memory_total": 7611.0,
                "gpu_memory_usage": 10.0,
                "gpu_memory_percent": 0.13
            }
        ],
        "containers": [
            {
                "id": "30a879f11ec4",
                "cpu_percent": 105.44,
                "cpu_memory_total": 4143292416,
                "cpu_memory_usage": 355155968,
                "cpu_memory_percent": 8.57,
                "op": "train",
                "user": "test",
                "service_uuid": "123456"
            }
        ]
    }
}
```

In [15]:
print(query_async_data('platform/admin/stats/info'))

{
    "version": "0.1.0",
    "type": "info",
    "tag": "platform",
    "op": "stats",
    "timestamp": 1574428984980,
    "datetime": "2019-11-22 21:23:04",
    "info": {
        "cpu_percent": 8.5,
        "cpu_percent_list": [
            8.4,
            8.6
        ],
        "cpu_memory_total": 4143292416,
        "cpu_memory_usage": 3695001600,
        "cpu_memory_percent": 95.1,
        "gpus": [
            {
                "name": "Tesla P4",
                "gpu_percent": 22.0,
                "gpu_memory_total": 7611.0,
                "gpu_memory_usage": 617.0,
                "gpu_memory_percent": 8.11
            }
        ],
        "containers": [
            {
                "id": "f3726eae19d6",
                "cpu_percent": 116.66,
                "cpu_memory_total": 4143292416,
                "cpu_memory_usage": 3002134528,
                "cpu_memory_percent": 72.46,
                "op": "train",
                "user": "test",
                "service_uuid"

### 2. 平台控制/k12ai/platform/control

A. 输入

```json
{
    "username": "", // [M] 登陆k12ai service的User
    "password": "", // [M] 登陆k12ai service的Code
    "op": "",  // [M] 控制命令, 如, 停止某个container任务"container.stop"
    "async": true, // [M] 控制命令的是否异步操作, 提醒: 有些命令操作是耗时的, 需要采用异步方式
    "params": { // [C] 命令的参数, 根据命令不同, 内部结构也会不同
    }
}
```

B. 输出

```json
{
    "code": 100200,
    "descr": "task service success",
    "message": "container id f3726eae19d6 stop success"
}
```

#### 2.1 停止某个任务

In [19]:
try:
    result = json.loads(query_async_data('platform/admin/stats/info'))
    cid = result['info']['containers'][0]['id']
except:
    cid = 'testid'
    
cmd = 'container.stop'

data = '''{
    "username": "test",
    "password": "test",
    "op": "%s",
    "async": false,
    "params": {
        "id": "%s"
    }
}''' % (cmd, cid)

api = 'http://%s:%d/k12ai/platform/control' % (host, port)
print_json(requests.post(url=api, json=json.loads(data)).text)

{
    "code": 100200,
    "descr": "task service success",
    "message": "container id f3726eae19d6 stop success"
}


In [20]:
print(query_async_data('platform/admin/control/container.stop'))

{
    "version": "0.1.0",
    "type": "container.stop",
    "tag": "platform",
    "op": "control",
    "timestamp": 1574420342216,
    "datetime": "2019-11-22 18:59:02",
    "container.stop": "container id testid is not found"
}
