In [1]:
import collections
import json

REVIEW_ID = "review_id"
INITIALS = "initials"

REVIEW, REBUTTAL = "review rebuttal".split()

class AnnotationTypes(object):
    rev_ann = "reviewannotation"
    rev_sent_ann = "reviewsentenceannotation"
    reb_sent_ann = "rebuttalsentenceannotation"
    ALL = [rev_ann, rev_sent_ann, reb_sent_ann]

KEY_FIELDS = {
    AnnotationTypes.rev_ann: [REVIEW_ID, INITIALS],
    AnnotationTypes.rev_sent_ann: [REVIEW_ID, INITIALS, "review_sentence_index"],
    AnnotationTypes.reb_sent_ann: [REVIEW_ID, INITIALS, "rebuttal_sentence_index"],
}

In [2]:
with open("final_data_dump/orda_text_0415.json", 'r') as f:
    j = json.load(f)
    
def get_key_from_annotation(ann, ann_type):
    return tuple(ann["fields"][i] for i in KEY_FIELDS[ann_type])

def fix_builder(builder):
    temp = collections.defaultdict(list)
    for k, v in builder.items():
        temp[(k[0], k[1])].append((k[2], v))
    return {k: vs[1] for vs in sorted(v) for k, v in temp.items()}

sentence_map = collections.defaultdict(list)
for sentence in j["sentence"]:
    fields = sentence["fields"]
    assert fields["sentence_index"] == len(
        sentence_map[fields["comment_id"]])
    sentence_map[fields["comment_id"]].append(fields["text"])

comment_pair_map = {}    
for example in j["example"]:
    fields = example["fields"]
    review_id, rebuttal_id = fields["review_id"], fields["rebuttal_id"]
    comment_pair_map[review_id] = {
        REVIEW: sentence_map[review_id],
        REBUTTAL: sentence_map[rebuttal_id]
    }
    
with open("final_data_dump/orda_annotations_0516.json", 'r') as f:
    annotations_from_file = json.load(f) 
    
builders = {
    annot:{}
    for annot in AnnotationTypes.ALL}

for annot in AnnotationTypes.ALL:
    sorted_rows = sorted(
        annotations_from_file[annot], key=lambda x:x["pk"])
    for row in sorted_rows:
        key = get_key_from_annotation(row, annot)
        builders[annot][key] = row
        
fixed_builders = {
    annot:{}
    for annot in AnnotationTypes.ALL}
fixed_builders[AnnotationTypes.rev_ann] = builders[AnnotationTypes.rev_ann] 
fixed_builders[AnnotationTypes.rev_sent_ann] = fix_builder(builders[AnnotationTypes.rev_sent_ann])
fixed_builders[AnnotationTypes.reb_sent_ann] = fix_builder(builders[AnnotationTypes.reb_sent_ann])


In [5]:
def get_merge_prev(error_field):
    if type(error_field) == dict:
        return error_field["merge_prev"]
    elif type(error_field) == str:
        error_field = json.loads(error_field)
        return get_merge_prev(error_field)
    else:
        assert False

ReviewSentenceLabels = collections.namedtuple("ReviewSentenceLabels", 
                                              "review_id sentence_index coarse fine aspect polarity".split())


class Review(object):
    def __init__(self, rev_ann, rev_sent_anns, review_sents, review_id):
        merge_prev = get_merge_prev(rev_ann["fields"]["errors"])
        self.carry_out_merges(review_sents, merge_prev, rev_sent_anns)

    def carry_out_merges(self, review_sents, merge_prev, rev_sent_anns):
        pass
                
class Rebuttal(object):
    def __init__(self, reb_anns, reb_sents, review_id):
        pass

                
def clean_review_sentence_dict(rev_sent_dict):
    new_map = {}
    for k, v in rev_sent_dict["fields"].items():
        if k == "labels":
            labels = json.loads(json.loads(v))
            new_map.update(labels["0"])
        else:
            new_map[k] = v
    return new_map

fine_count = 0
for key, rev_ann_items in builders[AnnotationTypes.rev_ann].items():

    if key not in fixed_builders[AnnotationTypes.reb_sent_ann]:
        print("No rebuttal for ", key)
        continue
    elif key not in fixed_builders[AnnotationTypes.rev_sent_ann]:
        print("No review (???) for ", key)
        continue    
    else:
        #print(".")
        fine_count += 1
    
    rev_sent_ann_items = fixed_builders[AnnotationTypes.rev_sent_ann][key]
    reb_sent_ann_items = fixed_builders[AnnotationTypes.reb_sent_ann][key]
    review_obj = Review(
        rev_ann_items, rev_sent_ann_items,
        comment_pair_map[review_id]["review"], review_id)
    rebuttal_obj = Rebuttal(reb_sent_ann_items,
        comment_pair_map[review_id]["rebuttal"], review_id)
    
print(fine_count)

No rebuttal for  ('rJgLrhZq2X', 'AS')
No rebuttal for  ('example_review', 'MC')
No rebuttal for  ('HJeXDu9h2X', 'TJO')
No rebuttal for  ('H1gVYFVS3X', 'MC')
No rebuttal for  ('SylPHRPDnQ', 'RG')
No rebuttal for  ('HyeZp_ivTQ', 'NNK')
No rebuttal for  ('example_review', 'MAD')
No rebuttal for  ('Hyxuz9BAnQ', 'SW')
No rebuttal for  ('rkl9CLxv27', 'AV')
No rebuttal for  ('Ske_YvI527', 'RG')
No rebuttal for  ('S1lR3oO8TX', 'KG')
No rebuttal for  ('rkl9CLxv27', 'MAD')
No rebuttal for  ('HygHUcfK_r', 'MS')
No rebuttal for  ('S1e82d0HqB', 'MAD')
No rebuttal for  ('BklZ3SQ5nX', 'SM')
No rebuttal for  ('H1gVYFVS3X', 'PKY')
No rebuttal for  ('Ske_2A9d3m', 'AN')
No rebuttal for  ('H1lJ9-Pdn7', 'AN')
No rebuttal for  ('SyeS7CQ83m', 'RG')
No rebuttal for  ('example_review', 'CB')
No rebuttal for  ('H1lJ9-Pdn7', 'NNK')
No rebuttal for  ('Hygsieas2X', 'PKY')
No rebuttal for  ('rJe1e3jj3X', 'PKY')
No rebuttal for  ('rJgLrhZq2X', 'PKY')
No rebuttal for  ('SklxtSc93Q', 'PKY')
No rebuttal for  ('B1lChPhJ