# simple model——如何使用建模框架

使用建模框架实现两个模型：

- char分词attention模型
- jieba分词attention模型


# 1. char分词建模


## train
### 参数设置——utils
- 数据：cmn.txt
- 切词：char
- 句子：500

### 参数设置——train

- outputdir: logs/char
- epochs: 50
- unit type: lstm
- units num: 512
- num layer: 2
- attention: luong
- optimizer: sdg
- learnrate: 1
- keepprob : 0.8


In [None]:
# 包含数据处理函数
from utils import GenData
# 包含模型参数文件
from params import create_hparams
# 模型文件
from model import BaseModel

data = GenData('cmn.txt','char',500)
param = create_hparams()
param.out_dir = 'logs/char'
param.encoder_vocab_size = len(data.id2en)
param.decoder_vocab_size = len(data.id2ch)

model = BaseModel(param, 'train')
model.train(data)

## infer
### 参数设置——utils
- 同train

### 参数设置——infer

- 解码方法：greedy（beam search有bug）

set `param.batch_size = 1`

set `model = BaseModel(param, 'infer')`

use `model.inference(data)` make inference work

In [None]:
from utils import GenData
from params import create_hparams
from model import BaseModel

def main():
    data = GenData('cmn.txt','char',500)
    param = create_hparams()
    param.out_dir = 'logs/char'
    param.encoder_vocab_size = len(data.id2en)
    param.decoder_vocab_size = len(data.id2ch)

    # infer模式下需要改动
    param.batch_size = 1
    param.keepprob = 1

    model = BaseModel(param, 'infer')
    model.inference(data)

main()

# 2. jieba分词建模



## train
### 参数设置——utils
- 数据：cmn.txt
- 切词：jieba
- 句子：200

### 参数设置——train

- outputdir: logs/jieba
- epochs: 50
- unit type: lstm
- units num: 512
- num layer: 2
- attention: luong
- optimizer: sdg
- learnrate: 1
- keepprob : 0.8


In [None]:
# 包含数据处理函数
from utils import GenData
# 包含模型参数文件
from params import create_hparams
# 模型文件
from model import BaseModel

data = GenData('cmn.txt','jieba',500)
param = create_hparams()
param.out_dir = 'logs/jieba'
param.encoder_vocab_size = len(data.id2en)
param.decoder_vocab_size = len(data.id2ch)

model = BaseModel(param, 'train')
model.train(data)

### 参数设置——utils
- 同train

### 参数设置——infer

- 解码方法：greedy（beam search有bug）

set `param.batch_size = 1`

set `model = BaseModel(param, 'infer')`

use `model.inference(data)` make inference work

In [None]:
from utils import GenData
from params import create_hparams
from model import BaseModel

def main():
    data = GenData('cmn.txt','jieba',500)
    param = create_hparams()
    param.out_dir = 'logs/jieba'
    param.encoder_vocab_size = len(data.id2en)
    param.decoder_vocab_size = len(data.id2ch)

    # infer模式下需要改动
    param.batch_size = 1
    param.keepprob = 1

    model = BaseModel(param, 'infer')
    model.inference(data)

main()

# 3. hanlp分词建模


## train
### 参数设置——utils
- 数据：cmn.txt
- 切词：hanlp
- 句子：200

### 参数设置——train

- outputdir: logs/hanlp
- epochs: 50
- unit type: lstm
- units num: 512
- num layer: 2
- attention: luong
- optimizer: sdg
- learnrate: 1
- keepprob : 0.8


In [None]:
# 包含数据处理函数
from utils import GenData
# 包含模型参数文件
from params import create_hparams
# 模型文件
from model import BaseModel

data = GenData('cmn.txt','hanlp',500)
param = create_hparams()
param.out_dir = 'logs/hanlp'
param.encoder_vocab_size = len(data.id2en)
param.decoder_vocab_size = len(data.id2ch)

model = BaseModel(param, 'train')
model.train(data)

### 参数设置——utils
- 同train

### 参数设置——infer

- 解码方法：greedy（beam search有bug）

set `param.batch_size = 1`

set `model = BaseModel(param, 'infer')`

use `model.inference(data)` make inference work

In [None]:
from utils import GenData
from params import create_hparams
from model import BaseModel

def main():
    data = GenData('cmn.txt','hanlp',500)
    param = create_hparams()
    param.out_dir = 'logs/hanlp'
    param.encoder_vocab_size = len(data.id2en)
    param.decoder_vocab_size = len(data.id2ch)

    # infer模式下需要改动
    param.batch_size = 1
    param.keepprob = 1

    model = BaseModel(param, 'infer')
    model.inference(data)

main()