In [35]:
from pprint import pprint
from paddlenlp import Taskflow

In [36]:
schema = {
    '日期':
    [
        {
            '出行时间': ['出行地点']
        },
        '日期地点'
    ],
}

my_ie = Taskflow("information_extraction", schema=schema, task_path='./checkpoint/model_best')

[32m[2022-09-13 16:27:48,347] [    INFO][0m - We are using <class 'paddlenlp.transformers.ernie.tokenizer.ErnieTokenizer'> to load 'ernie-3.0-base-zh'.[0m
[32m[2022-09-13 16:27:48,348] [    INFO][0m - Already cached /Users/ming8525/.paddlenlp/models/ernie-3.0-base-zh/ernie_3.0_base_zh_vocab.txt[0m


In [37]:
text = '4月6日，每天8:00左右从居住地骑电动车到顺义区裕龙六区33号楼改水电，18:00左右返回西坝河东里社区。4月11日，每天6:00左右从西坝河东里社区骑电动车到东城区南门仓胡同小区改水电，18:00左右骑电动车返回西坝河北里社区。'

In [38]:
def get_mapped_type(type):
    if (type == '日期'):
        return 'date'
    elif (type == '出行时间'):
        return 'time'
    elif (type == '出行地点' or type == '日期地点'):
        return 'loc'
    else:
        print('Unexpect type: ' + type)
        return None

def sort_by_token(data):
    data.sort(key=lambda x: x['start'])

def is_existed_entity(input, entities):
    existed = False
    for entity in entities:
        if (entity['text'] == input['text'] and entity['type'] == input['type']):
            existed = True
            break
    return existed

def parse_entity(entity, type):
    res = {}
    text = entity['text']
    start = entity['start']
    end = entity['end']
    res['type'] = type
    res['text'] = text
    res['start'] = start
    res['end'] = end
    if('relations' in entity):
        res['relations'] = []
        relations = entity['relations']
        for relation in relations:
            type = get_mapped_type(relation)
            in_entities = relations[relation]
            sort_by_token(in_entities)
            for in_entity in in_entities:
                parsed = parse_entity(in_entity, type)
                if(not is_existed_entity(parsed, res['relations'])):
                    res['relations'].append(parse_entity(in_entity, type))
                
    return res

def parse_track_result(results):
    res = []
    for result in results:
        key = list(result.keys())[0]
        type = get_mapped_type(key)
        if (type == None):
            continue
        entities = result[key]
        sort_by_token(entities)
        for entity in entities:
            res.append(parse_entity(entity, type))
    return res

In [39]:

result = [{
  'type': 'date',
  'text': '4月6日',
  'start': 7,
  'end': 14,
  'relations': [{
    'type': 'time',
    'text': '6:00',
    'start': 17,
    'end': 21,
    'relations': [{
      'type': 'loc',
      'text': '东城区南门仓胡同小区',
      'start': 143,
      'end': 153,
    }, {
      'text': 'loc',
      'loc': '西坝河东里社区',
      'start': 170,
      'end': 177,
    }]
  }, {
      'type': 'loc',
      'text': '东城区南门仓胡同小区',
      'start': 143,
      'end': 153,
    }, {
      'type': 'loc',
      'text': '西坝河东里社区',
      'start': 170,
      'end': 177,
    }]
}]

In [40]:
raw = my_ie(text)
pprint(raw)

[{'日期': [{'end': 4,
          'probability': 0.9992743230325232,
          'relations': {'出行时间': [{'end': 42,
                                  'probability': 0.9822677271152571,
                                  'relations': {'出行地点': [{'end': 53,
                                                          'probability': 0.9742258619266728,
                                                          'start': 46,
                                                          'text': '西坝河东里社区'}]},
                                  'start': 37,
                                  'text': '18:00'},
                                 {'end': 11,
                                  'probability': 0.9703164621178786,
                                  'relations': {'出行地点': [{'end': 33,
                                                          'probability': 0.9564991612796341,
                                                          'start': 22,
                                                          'tex

In [41]:
pprint(parse_track_result(raw))

[{'end': 4,
  'relations': [{'end': 11,
                 'relations': [{'end': 17,
                                'start': 14,
                                'text': '居住地',
                                'type': 'loc'},
                               {'end': 33,
                                'start': 22,
                                'text': '顺义区裕龙六区33号楼',
                                'type': 'loc'}],
                 'start': 7,
                 'text': '8:00',
                 'type': 'time'},
                {'end': 42,
                 'relations': [{'end': 53,
                                'start': 46,
                                'text': '西坝河东里社区',
                                'type': 'loc'}],
                 'start': 37,
                 'text': '18:00',
                 'type': 'time'},
                {'end': 33, 'start': 22, 'text': '顺义区裕龙六区33号楼', 'type': 'loc'},
                {'end': 53, 'start': 46, 'text': '西坝河东里社区', 'type': 'loc'}],
  'start': 0,
  't