In [421]:
import pandas as pd

In [422]:
def is_variable(pat):
    return pat.startswith('?') and all(s.isalpha() for s in pat[1:])

In [484]:
def pat_match(pattern, saying):
    if not pattern or not saying: return []
    
    if is_variable(pattern[0]):
        return [(pattern[0], saying[0])] + pat_match(pattern[1:], saying[1:])
    else:
        if pattern[0] != saying[0]: return []
        else:
            return pat_match(pattern[1:], saying[1:])

In [485]:
pat_match('I want ?X'.split(), "I want holiday".split())

[('?X', 'holiday')]

In [486]:
def pat_to_dict(patterns):
    return {k: v for k, v in patterns}

In [487]:
def subsitite(rule, parsed_rules):
    if not rule: return []
    
    return [parsed_rules.get(rule[0], rule[0])] + subsitite(rule[1:], parsed_rules)

In [488]:
got_patterns = pat_match("I want ?X".split(), "I want iPhone".split())

In [489]:
' '.join(subsitite("What if you mean if you got a ?X".split(), pat_to_dict(got_patterns)))

'What if you mean if you got a iPhone'

In [490]:
defined_patterns = {
    "I need ?X": ["Image you will get ?X soon", "Why do you need ?X ?"], 
    "My ?X told me something": ["Talk about more about your ?X", "How do you think about your ?X ? "]
}

In [491]:
import random

In [492]:
def get_response(saying, rules):
    for i in rules.keys():
        if not pat_match(i.split(), saying.split()):
            continue
        return ' '.join(subsitite(random.choice(rules[i]).split(), pat_to_dict(pat_match(i.split(), saying.split()))))

In [567]:
get_response('I need iPhone', defined_patterns)

['I', 'need', '?X']
[('?X', 'iPhone')]


'Image you will get iPhone soon'

In [570]:
get_response("My cat told me something", defined_patterns)

['I', 'need', '?X']
[True, None]
['My', '?X', 'told', 'me', 'something']
[('?X', 'cat')]


'Talk about more about your cat'

# 第二部分 Segment Match

In [495]:
def is_pattern_segment(pattern):
    return pattern.startswith('?*') and all(a.isalpha() for a in pattern[2:])

In [536]:
from collections import defaultdict

In [537]:
def is_match(rest, saying):
    if not rest and not saying:
        return True
    if not all(a.isalpha() for a in rest[0]):
        return True
    if rest[0] != saying[0]:
        return False
    return is_match(rest[1:], saying[1:])

In [538]:
def segment_match(pattern, saying):
    seg_pat, rest = pattern[0], pattern[1:]
    seg_pat = seg_pat.replace('?*', '?')

    if not rest: return (seg_pat, saying), len(saying)    
    
    for i, token in enumerate(saying):
        if rest[0] == token and is_match(rest[1:], saying[(i + 1):]):
            return (seg_pat, saying[:i]), i
    
    return (seg_pat, saying), len(saying)

In [609]:
fail = [True, None]

def pat_match_with_seg(pattern, saying):
    if pattern and not saying: return fail
    elif not saying: return []
     
    pat = pattern[0]
    
    if is_variable(pat):
        return [(pat, saying[0])] + pat_match_with_seg(pattern[1:], saying[1:])
    elif pat == saying[0]:
        return pat_match_with_seg(pattern[1:], saying[1:])
    elif is_pattern_segment(pat):
        match, index = segment_match(pattern, saying)
        return [match] + pat_match_with_seg(pattern[1:], saying[index:])
    else:
        return fail

In [610]:
segment_match('?*P is very good'.split(), "My dog and my cat is very good".split())

(('?P', ['My', 'dog', 'and', 'my', 'cat']), 5)

In [611]:
pat_match_with_seg('?*P is very good and ?*X'.split(), "My dog is very good and my cat is very cute".split())

[('?P', ['My', 'dog']), ('?X', ['my', 'cat', 'is', 'very', 'cute'])]

In [612]:
def pat_to_dict(patterns):
    return {k: ' '.join(v) if isinstance(v, list) else v for k, v in patterns}

In [613]:
def subsitite(rule, parsed_rules):
    if not rule: return []
    if parsed_rules.get(rule[0]) != None:
        ret = [parsed_rules.get(rule[0])]
    else:
        ret = [rule[0]]
    
    return ret + subsitite(rule[1:], parsed_rules)

In [614]:
print(pat_to_dict(pat_match_with_seg('I need ?*X'.split(), 
                  "I need an iPhone".split())))
subsitite("Why do you neeed ?X".split(), pat_to_dict(pat_match_with_seg('I need ?*X'.split(), 
                  "I need an iPhone".split())))

{'?X': 'an iPhone'}


['Why', 'do', 'you', 'neeed', 'an iPhone']

In [615]:
rule_responses = {
#     '?*x hello ?*y': ['How do you do', 'Please state your problem'],
#     '?*x I want ?*y': ['what would it mean if you got ?y', 'Why do you want ?y', 'Suppose you got ?y soon'],
#     '?*x if ?*y': ['Do you really think its likely that ?y', 'Do you wish that ?y', 'What do you think about ?y', 'Really-- if ?y'],
#     '?*x no ?*y': ['why not?', 'You are being a negative', 'Are you saying \'No\' just to be negative?'],
#     '?*x I was ?*y': ['Were you really', 'Perhaps I already knew you were ?y', 'Why do you tell me you were ?y now?'],
#     '?*x I feel ?*y': ['Do you often feel ?y ?', 'What other feelings do you have?'],
    '?*x 你好 ?*y': ['你好呀', '请告诉我你的问题'],
    '?*x 我想 ?*y': ['你觉得?y有什么意义呢？', '为什么你想?y', '你可以想想你很快就可以?y了'],
    '?*x 我想要 ?*y': ['?x想问你，你觉得?y有什么意义呢?', '为什么你想?y', '?x觉得... 你可以想想你很快就可以有?y了', '你看?x像?y不', '我看你就像?y'],
    '?*x 喜欢 ?*y': ['喜欢?y的哪里？', '?y有什么好的呢？', '你想要?y吗？'],
    '?*x 讨厌 ?*y': ['?y怎么会那么讨厌呢?', '讨厌?y的哪里？', '?y有什么不好呢？', '你不想要?y吗？'],
    '?*x AI ?*y': ['你为什么要提AI的事情？', '你为什么觉得AI要解决你的问题？'],
    '?*x 机器人 ?*y': ['你为什么要提机器人的事情？', '你为什么觉得机器人要解决你的问题？'],
    '?*x 对不起 ?*y': ['不用道歉', '你为什么觉得你需要道歉呢?'],
    '?*x 我记得 ?*y': ['你经常会想起这个吗？', '除了?y你还会想起什么吗？', '你为什么和我提起?y'],
    '?*x 如果 ?*y': ['你真的觉得?y会发生吗？', '你希望?y吗?', '真的吗？如果?y的话', '关于?y你怎么想？'],
    '?*x 我 ?*z 梦见 ?*y':['真的吗? --- ?y', '你在醒着的时候，以前想象过?y吗？', '你以前梦见过?y吗'],
    '?*x 妈妈 ?*y': ['你家里除了?y还有谁?', '嗯嗯，多说一点和你家里有关系的', '她对你影响很大吗？'],
    '?*x 爸爸 ?*y': ['你家里除了?y还有谁?', '嗯嗯，多说一点和你家里有关系的', '他对你影响很大吗？', '每当你想起你爸爸的时候， 你还会想起其他的吗?'],
    '?*x 我愿意 ?*y': ['我可以帮你?y吗？', '你可以解释一下，为什么想?y'],
    '?*x 我很难过，因为 ?*y': ['我听到你这么说， 也很难过', '?y不应该让你这么难过的'],
    '?*x 难过 ?*y': ['我听到你这么说， 也很难过',
                 '不应该让你这么难过的，你觉得你拥有什么，就会不难过?',
                 '你觉得事情变成什么样，你就不难过了?'],
    '?*x 就像 ?*y': ['你觉得?x和?y有什么相似性？', '?x和?y真的有关系吗？', '怎么说？'],
    '?*x 和 ?*y 都 ?*z': ['你觉得?z有什么问题吗?', '?z会对你有什么影响呢?'],
    '?*x 和 ?*y 一样 ?*z': ['你觉得?z有什么问题吗?', '?z会对你有什么影响呢?'],
    '?*x 我是 ?*y': ['真的吗？', '?x想告诉你，或许我早就知道你是?y', '你为什么现在才告诉我你是?y'],
    '?*x 我是 ?*y 吗': ['如果你是?y会怎么样呢？', '你觉得你是?y吗', '如果你是?y，那一位着什么?'],
    '?*x 你是 ?*y 吗':  ['你为什么会对我是不是?y感兴趣?', '那你希望我是?y吗', '你要是喜欢， 我就会是?y'],
    '?*x 你是 ?*y' : ['为什么你觉得我是?y'],
    '?*x 因为 ?*y' : ['?y是真正的原因吗？', '你觉得会有其他原因吗?'],
    '?*x 我不能 ?*y': ['你或许现在就能?*y', '如果你能?*y,会怎样呢？'],
    '?*x 我觉得 ?*y': ['你经常这样感觉吗？', '除了到这个，你还有什么其他的感觉吗？'],
    '?*x 我 ?*y你 ?*z': ['其实很有可能我们互相?y'],
    '?*x 你为什么不 ?*y': ['你自己为什么不?y', '你觉得我不会?y', '等我心情好了，我就?y'],
    '?*x 好的 ?*y': ['好的', '你是一个很正能量的人'],
    '?*x 嗯嗯 ?*y': ['好的', '你是一个很正能量的人'],
    '?*x 不嘛 ?*y': ['为什么不？', '你有一点负能量', '你说 不，是想表达不想的意思吗？'],
    '?*x 不要 ?*y': ['为什么不？', '你有一点负能量', '你说 不，是想表达不想的意思吗？'],
    '?*x 有些人 ?*y': ['具体是哪些人呢?'],
    '?*x 有的人 ?*y': ['具体是哪些人呢?'],
    '?*x 某些人 ?*y': ['具体是哪些人呢?'],
    '?*x 每个人 ?*y': ['我确定不是人人都是', '你能想到一点特殊情况吗？', '例如谁？', '你看到的其实只是一小部分人'],
    '?*x 所有人 ?*y': ['我确定不是人人都是', '你能想到一点特殊情况吗？', '例如谁？', '你看到的其实只是一小部分人'],
    '?*x 总是 ?*y': ['你能想到一些其他情况吗?', '例如什么时候?', '你具体是说哪一次？', '真的---总是吗？'],
    '?*x 一直 ?*y': ['你能想到一些其他情况吗?', '例如什么时候?', '你具体是说哪一次？', '真的---总是吗？'],
    '?*x 或许 ?*y': ['你看起来不太确定'],
    '?*x 可能 ?*y': ['你看起来不太确定'],
    '?*x 他们是 ?*y 吗？': ['你觉得他们可能不是?y？'],
    '?*x': ['很有趣', '请继续', '我不太确定我很理解你说的, 能稍微详细解释一下吗?']
}

In [629]:
def get_response(saying, rules):
    for i in rules.keys():
        match = pat_match_with_seg(i.split(), saying.split())
        if not pat_match_with_seg(i.split(), saying.split())[-1]:
            continue
        ret = random.choice(rules[i]).split()
        return ' '.join(subsitite(ret, pat_to_dict(pat_match_with_seg(i.split(), saying.split()))))
    

In [632]:
get_response('hhhhh I was god', rule_responses)

'请继续'

In [633]:
import jieba

In [634]:
def cut_jieba(string):
    return list(jieba.cut(string))

In [635]:

def cut_jieba_pat(string):
    tmp =[]
    string_list = string.split()
    for i in string_list:
        if i.startswith('?'):
            tmp.append(i)
        else:
            tmp += cut_jieba(i)
    return tmp

In [636]:
cut_jieba_pat('?*x 你好 ?*y')

['?*x', '你好', '?*y']

In [637]:
import re
def replace_pat_string(string):
    print(re.sub(r'(?P<n1>\?\S)', ' \g<n1> ', string))
    return re.sub(r'(?P<n1>\?\S)', ' \g<n1> ', string).split()


In [638]:
def get_ZH_response(saying, rules):
    for i in rules.keys():
        print(pat_match_with_seg(cut_jieba_pat(i), cut_jieba(saying)))
        if not pat_match_with_seg(cut_jieba_pat(i), list(jieba.cut(saying)))[-1]:
            continue
        return ''.join(subsitite(replace_pat_string(random.choice(rules[i])), pat_to_dict(pat_match_with_seg(i.split(), list(jieba.cut(saying))))))

In [643]:
get_ZH_response('就喜欢小贝', rule_responses)

[('?x', ['就', '喜欢', '小贝']), True, None]
[('?x', ['就', '喜欢', '小贝']), True, None]
[('?x', ['就', '喜欢', '小贝']), True, None]
[('?x', ['就']), ('?y', ['小贝'])]
喜欢 ?y 的哪里？


'喜欢小贝的哪里？'