### Test Filter

In [8]:
from transformers import AutoTokenizer
import pprint

In [7]:
examples = [
    {"sentText": "从海战和海损的实例来看，舰艇的损害主要表现在4个方面：一是舰艇破损进水，机电长①123，甚至倾覆或沉没；二是舰艇起火或爆炸；三是舰艇技术装备损坏；四是舰员被杀伤或毒害。在舰艇上，一切保障舰艇生命力的活动，称为舰艇损害管制，简称舰艇损管。舰艇损管主要是处置舰艇损害活动。《水面舰艇损害管制条例》规定：保障舰艇生命力是全体舰员共同的战斗职责；舰首长对全舰的损管工作实施组织指挥；机电长除领导本部门损管外，还应协助舰首长具体组织领导全舰性损管。因此，保障舰艇生命力的基本原则是全体舰指挥员必须掌握的，以便在平时能正确运用生命力观点分析掌握本舰艇的生命力状况，充分发挥其优点，避免和弥补其弱点，使舰艇具有最大的抵抗损害的能力，能正确地向舰员进行保障舰艇生命力方面的教育和组织损管训练。在舰艇发生损害的情况下，能熟练地运用保障舰艇生命力的基本原则和方法，沉着地组织领导舰员与破损灾害作斗争，取得损管的成功和战斗的胜利。例如在发生火灾后，利用构造上的防火防爆设施灭火装备和器材，限制火灾的蔓延，舰员合理发挥舰上防火防爆设施和灭火装备器材的作用，快速有效地消灭火灾，或将火灾造成的损失降至最低程度。同时，舰艇在战时受武器攻击或因海损事故而引起舰体破损进水",
        "relations": [
            {"em1Text": "舰首长", "em2Text": "保障舰艇生命力", "label": "职责"},
            {"em1Text": "舰首长", "em2Text": "机电长", "label": "同事"},
            {"em1Text": "舰首长", "em2Text": "机电长", "label": "下属"},
            {"em1Text": "舰首长", "em2Text": "机电长", "label": "业务伙伴"},
            {"em1Text": "舰首长", "em2Text": "123", "label": "业务伙伴"}
        ]
    }
]


In [13]:
model_name_or_path = "D:\\Document\\PLMs\\Bert\\bert_origin\\bert-base-chinese"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)


for example in examples:
    sent_tokens = tokenizer.tokenize(example["sentText"])

    relations = []
    for relation in example["relations"]:
        sub_tokens = tokenizer.tokenize(relation["em1Text"])
        obj_tokens = tokenizer.tokenize(relation["em2Text"])

        # 1. 判断 subject 是否在句子中，长短数组匹配问题
        sub_start = -1
        sub_end = -1
        for i in range(len(sent_tokens) - len(sub_tokens) + 1):
            if sent_tokens[i:i+len(sub_tokens)] == sub_tokens:
                sub_start = i
                sub_end = i + len(sub_tokens) - 1
                break

        # 2. 判断 object 是否在句子中，长短数组匹配问题
        obj_start = -1
        obj_end = -1
        for i in range(len(sent_tokens) - len(obj_tokens) + 1):
            if sent_tokens[i:i+len(obj_tokens)] == obj_tokens:
                obj_start = i
                obj_end = i + len(obj_tokens) - 1
                break

        # 3. 判断 subject 和 object 是否在句子中，长短数组匹配问题
        if sub_start == -1:
            print("subject not in sentText")
            print("subject", sub_tokens)
            print("sentText", sent_tokens)
            continue

        if obj_start == -1:
            print("object not in sentText")
            print("object", obj_tokens)
            print("sentText", sent_tokens)
            continue

        relations.append({
            "em1Text": relation["em1Text"],
            "em2Text": relation["em2Text"],
            "label": relation["label"],
            "em1Start": sub_start,
            "em1End": sub_end,
            "em2Start": obj_start,
            "em2End": obj_end
        })

    example["relations"] = relations

print(examples)


object not in sentText
object ['123']
sentText ['从', '海', '战', '和', '海', '损', '的', '实', '例', '来', '看', '，', '舰', '艇', '的', '损', '害', '主', '要', '表', '现', '在', '4', '个', '方', '面', '：', '一', '是', '舰', '艇', '破', '损', '进', '水', '，', '机', '电', '长', '①', '##123', '，', '甚', '至', '倾', '覆', '或', '沉', '没', '；', '二', '是', '舰', '艇', '起', '火', '或', '爆', '炸', '；', '三', '是', '舰', '艇', '技', '术', '装', '备', '损', '坏', '；', '四', '是', '舰', '员', '被', '杀', '伤', '或', '毒', '害', '。', '在', '舰', '艇', '上', '，', '一', '切', '保', '障', '舰', '艇', '生', '命', '力', '的', '活', '动', '，', '称', '为', '舰', '艇', '损', '害', '管', '制', '，', '简', '称', '舰', '艇', '损', '管', '。', '舰', '艇', '损', '管', '主', '要', '是', '处', '置', '舰', '艇', '损', '害', '活', '动', '。', '《', '水', '面', '舰', '艇', '损', '害', '管', '制', '条', '例', '》', '规', '定', '：', '保', '障', '舰', '艇', '生', '命', '力', '是', '全', '体', '舰', '员', '共', '同', '的', '战', '斗', '职', '责', '；', '舰', '首', '长', '对', '全', '舰', '的', '损', '管', '工', '作', '实', '施', '组', '织', '指', '挥', '；', '机', '电', '长', '除', '领'

In [4]:
# import pprint
prediction = {"0": [[4, 0.5495180487632751, 55, 55, 0.9999977350234985, 0.9999980926513672, 55, 55, 0.9999979734420776, 0.9999988079071045], [0, 0.9934440851211548, 55, 55, 0.9999932050704956, 0.9999946355819702, 55, 55, 0.9999977350234985, 0.9999984502792358], [9, 0.5417826175689697, 55, 55, 0.9999922513961792, 0.9999926090240479, 55, 55, 0.9999972581863403, 0.9999977350234985]], "1": [[8, 0.7148820757865906, 57, 58, 0.9999133348464966, 0.9998661279678345, 57, 58, 0.9976715445518494, 0.9998111128807068], [3, 0.848729133605957, 57, 58, 0.9999371767044067, 0.9994823932647705, 58, 58, 0.9999376535415649, 0.9999954700469971]], "2": [[0, 0.9082260131835938, 20, 21, 0.9988471269607544, 0.9999650716781616, 16, 18, 0.9998642206192017, 0.9999357461929321]]}


test_pred_lines = {}
for key, values in prediction.items():
    pred_relation = []
    for value in values:
        pred_rel = value[0]
        head_start_index = value[2]
        head_end_index = value[3]
        tail_start_index = value[6]
        tail_end_index = value[7]
        pred_relation.append([pred_rel, head_start_index, head_end_index, tail_start_index, tail_end_index])
    test_pred_lines.update({key: pred_relation})

print(test_pred_lines)

{'0': [[4, 55, 55, 55, 55], [0, 55, 55, 55, 55], [9, 55, 55, 55, 55]], '1': [[8, 57, 58, 57, 58], [3, 57, 58, 58, 58]], '2': [[0, 20, 21, 16, 18]]}


In [None]:
test_line = {"sentText": "In Queens , North Shore Towers , near the Nassau border , supplanted a golf course , and housing replaced a gravel quarry in Douglaston .",
 "relationMentions": [{"em1Text": "Queens", "em2Text": "Douglaston", "label": "/location/location/contains"}, {"em1Text": "Douglaston", "em2Text": "Queens", "label": "/location/neighborhood/neighborhood_of"}]}

{"instance2index": {
    "/location/location/contains": 0,
    "/people/person/place_of_birth": 1,
    "/business/person/company": 2,
    "/people/person/place_lived": 3,
    "/location/administrative_division/country": 4,
    "/location/country/administrative_divisions": 5,
    "/people/person/religion": 6,
    "/people/person/nationality": 7,
    "/people/person/children": 8,

  "instances": [
    "/location/location/contains",
    "/people/person/place_of_birth",
    "/business/person/company",
    "/people/person/place_lived",
    "/location/administrative_division/country",
    "/location/country/administrative_divisions",
    "/people/person/religion",
    "/people/person/nationality",
  ]
},

In [None]:
# 假设 origin_lines 是一个包含字典的列表
origin_lines = [{"sentText": "In Queenss , North Shore Towers , near the Nassau border , supplanted a golf course , and housing replaced a gravel quarry in Douglaston .",
"relationMentions": [{"em1Text": "Queens", "em2Text": "Douglaston", "label": "/location/location/contains"},{"em1Text": "Douglaston", "em2Text": "Queens", "label": "/location/neighborhood/neighborhood_of"}]}],

# 假设 pred_lines 是一个包含字典的列表
pred_lines = [{"relationMentions": [{"em1Text": "Queens", "em2Text": "Douglaston", "label": "/location/location/contains"},
                                    {"em1Text": "Douglaston", "em2Text": "Queens", "label": "/location/neighborhood/neighborhood_of"}]}
              ]

for origin_line in origin_lines:
    # 获取 origin_line 中的 relationMentions 字典
    origin_relation_mentions = origin_line["relationMentions"]

    # 使用 set 存储不重复的 relationMentions 字典
    unique_relation_mentions = set(tuple(rm.items()) for rm in origin_relation_mentions)

    # 遍历 pred_lines
    for pred_line in pred_lines:
        # 获取 pred_line 中的 relationMentions 字典
        pred_relation_mentions = pred_line["relationMentions"]

        # 遍历 pred_relation_mentions 字典
        for pred_rm in pred_relation_mentions:
            # 如果 pred_rm 不在 unique_relation_mentions 中，则将其添加到 origin_relation_mentions 中
            if tuple(pred_rm.items()) not in unique_relation_mentions:
                origin_relation_mentions.append(pred_rm)
                unique_relation_mentions.add(tuple(pred_rm.items()))

    # 更新 origin_line 中的 relationMentions 字典
    origin_line["relationMentions"] = list(unique_relation_mentions)

print(origin_lines)




In [1]:
dict1 = {'a': 1, 'b': 2, 'c': 3}
dict2 = {'b': 2, 'c': 4, 'd': 5}

diff1 = {key: dict1[key] for key in dict1.keys() if key not in dict2 or dict1[key] != dict2[key]}
diff2 = {key: dict2[key] for key in dict2.keys() if key not in dict1 or dict1[key] != dict2[key]}

print("Items in dict1 but not in dict2:", diff1)
print("Items in dict2 but not in dict1:", diff2)



NameError: name 'dict1' is not defined