In [1]:
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
from collections import defaultdict
from scipy import spatial
from IPython.display import HTML, display
import tabulate
import json
import pickle
import re
import nltk

In [2]:
from nltk.corpus import stopwords
from stanfordcorenlp import StanfordCoreNLP
from nltk.tree import Tree

# Dataset Prep

### This exp. is done using the test sentence only. Doesn't take into account the train IP set.

In [3]:
df = pd.read_excel("InputTestSet-Reviews48_Ann.xlsx")

In [4]:
df.head()

Unnamed: 0,UID,PID,Dec,Sent,MComp,Cat,SubCat
0,0,2019_SJf_XhCqKm,Reject,The authors propose to use k-DPP to select a s...,0,,
1,1,2019_SJf_XhCqKm,Reject,"This paper covers the related work nicely, wit...",0,,
2,2,2019_SJf_XhCqKm,Reject,The rest of the paper are also clearly written.,0,,
3,3,2019_SJf_XhCqKm,Reject,"However, I have some concerns about the propos...",0,,
4,4,2019_SJf_XhCqKm,Reject,"- It is not clear how to define the kernel, th...",0,,


In [5]:
df.shape

(1505, 7)

In [6]:
gt_dict = {}

for i in range(0, df.shape[0]):
    pid = df.loc[i]["PID"]
    if not pid in gt_dict:
        gt_dict[pid] = {"dec": df.loc[i]["Dec"], "mcomp": set(), "not_mcomp": set()}
    if df.loc[i]["MComp"] == 1:
        gt_dict[pid]["mcomp"].add(df.loc[i]["UID"])
    else:
        gt_dict[pid]["not_mcomp"].add(df.loc[i]["UID"])

In [7]:
stats_dict = {"Accept": [0, 0], "Reject": [0, 0]}

for k, v in gt_dict.items():
    #print(len(v["mcomp"]), len(v["not_mcomp"]), v["dec"])
    stats_dict[v["dec"]][0] += len(v["mcomp"])
    stats_dict[v["dec"]][1] += len(v["not_mcomp"])
    
print(stats_dict)

{'Accept': [48, 644], 'Reject': [69, 744]}


In [8]:
test_set = list(gt_dict.keys())
print("TestSet length: %d\n"%len(test_set), test_set)

TestSet length: 32
 ['2019_SJf_XhCqKm', '2017_Bk0MRI5lg', '2020_SyevYxHtDB', '2018_rJBiunlAW', '2020_rkltE0VKwH', '2018_Hki-ZlbA-', '2019_BJx0sjC5FX', '2020_r1e_FpNFDr', '2020_B1lsXREYvr', '2018_SkZxCk-0Z', '2019_rJzoujRct7', '2018_HkfXMz-Ab', '2017_BJ9fZNqle', '2019_SyxZJn05YX', '2017_B1ckMDqlg', '2017_HJ0NvFzxl', '2017_S1_pAu9xl', '2018_SyYYPdg0-', '2017_BJAA4wKxg', '2019_HyVxPsC9tm', '2019_HylTBhA5tQ', '2019_B1l08oAct7', '2018_H135uzZ0-', '2017_H1oyRlYgg', '2017_r1y1aawlg', '2020_r1eX1yrKwB', '2020_Byg79h4tvB', '2019_H1lFZnR5YX', '2020_BkeWw6VFwr', '2018_HyHmGyZCZ', '2018_HyUNwulC-', '2020_HkgsPhNYPS']


In [9]:
for k in test_set:
    print('{:20}{}'.format(k, gt_dict[k]["mcomp"]))

2019_SJf_XhCqKm     {39, 17, 20, 27, 28, 30}
2017_Bk0MRI5lg      {48, 57}
2020_SyevYxHtDB     {76, 87}
2018_rJBiunlAW      {108, 110, 112, 113, 124, 126}
2020_rkltE0VKwH     {160, 155, 184, 159}
2018_Hki-ZlbA-      {267, 235, 236, 271}
2019_BJx0sjC5FX     {292, 287}
2020_r1e_FpNFDr     {312, 322, 315, 308}
2020_B1lsXREYvr     {376, 401}
2018_SkZxCk-0Z      {449, 443, 445, 486}
2019_rJzoujRct7     {518, 519}
2018_HkfXMz-Ab      {573, 566}
2017_BJ9fZNqle      {627, 623, 615}
2019_SyxZJn05YX     {672, 673, 657, 669, 671}
2017_B1ckMDqlg      {714, 707}
2017_HJ0NvFzxl      {739}
2017_S1_pAu9xl      {792, 809, 810, 806}
2018_SyYYPdg0-      {834, 867, 868, 869, 870, 872, 873, 844, 830}
2017_BJAA4wKxg      {884}
2019_HyVxPsC9tm     {931, 933, 905, 909, 912, 913, 919, 926}
2019_HylTBhA5tQ     {972, 950}
2019_B1l08oAct7     {994, 996, 1064, 1004, 1007, 1044, 1047, 1048, 1055}
2018_H135uzZ0-      {1072, 1079}
2017_H1oyRlYgg      set()
2017_r1y1aawlg      {1125, 1162, 1100, 1102, 1168}
2020_r1eX1y

In [10]:
sents_for_test = defaultdict(list)

for i in range(0, df.shape[0]):
    pid = df.loc[i]["PID"]
    sents_for_test[pid].append((df.loc[i]["UID"], df.loc[i]["Sent"]))

## Load entities

In [11]:
with open("entities_dict_smaller", "r") as f:
    entity_dict = json.load(f)

In [12]:
set(entity_dict.values())

{'Material', 'Method', 'Metric', 'Task'}

In [13]:
list(entity_dict.items())[0:20]

[('convolutional neural networks', 'Method'),
 ('convnets', 'Method'),
 ('recognition', 'Task'),
 ('visual recognition tasks', 'Task'),
 ('age estimation', 'Task'),
 ('head pose estimation', 'Task'),
 ('multi - label classification', 'Task'),
 ('semantic segmentation', 'Task'),
 ('classification', 'Task'),
 ('deep convnets', 'Method'),
 ('dldl', 'Method'),
 ('feature learning', 'Task'),
 ('deep learning', 'Method'),
 ('image classification', 'Task'),
 ('deep learning methods', 'Method'),
 ('image classification tasks', 'Task'),
 ('human pose estimation', 'Task'),
 ('convnet', 'Method'),
 ('recognition tasks', 'Task'),
 ('ensemble', 'Method')]

In [14]:
entity_key_map = {}
for i in entity_dict:
    s = re.sub('[^0-9a-zA-Z,:;.?!\- ]+', '', i)
    while s.find("  ") > -1:
        s = s.replace("  ", " ")
    if len(s) > 2:
        cl = re.sub('[^0-9a-zA-Z ]+', '', i)
        while cl.find("  ") > -1:
            cl = cl.replace("  ", " ")
        entity_key_map[cl.strip()] = i
print(len(entity_key_map))

1784


In [15]:
coun = 0
for i in entity_dict:
    if len(i) < 5:
        coun +=1
#         print(i)
print(coun)

212


In [16]:
list(entity_key_map.items())[0:5]

[('convolutional neural networks', 'convolutional neural networks'),
 ('convnets', 'convnets'),
 ('recognition', 'recognition'),
 ('visual recognition tasks', 'visual recognition tasks'),
 ('age estimation', 'age estimation')]

In [17]:
from collections import Counter
c = Counter(entity_dict.values())
c

Counter({'Method': 1191, 'Task': 289, 'Metric': 158, 'Material': 165})

In [18]:
# dir(c)
reverse_map = defaultdict(list)

for k, v in entity_dict.items():
    reverse_map[v].append(k)

In [19]:
# for i in reverse_map["Task"]:
#     print(i)

In [20]:
"MNIST" in entity_key_map, "mnist" in entity_key_map

(False, True)

## A. RoBERTa trained on SciLit

In [21]:
import spacy
import torch

In [22]:
!pip3.7 list | grep -E 'transformers|tokenizers'

spacy-transformers            0.6.2
tokenizers                    0.7.0
transformers                  2.9.0


In [23]:
from transformers import AutoTokenizer, AutoModel

In [24]:
tokenizer = AutoTokenizer.from_pretrained("./trained_lm/CLMLModelRoBerta/")
model = AutoModel.from_pretrained("./trained_lm/CLMLModelRoBerta/")

In [25]:
import transformers
print(transformers.__version__)

2.9.0


In [26]:
def embed_text_using_roberta(text):
    input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0)  # Batch size 1
    outputs = model(input_ids)
    last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple
    return last_hidden_states

In [27]:
def mask_entities(sentence, replace_with_dataset=True):
    cleaned_sent = re.sub('[^0-9a-zA-Z,:;.?!\- ]+', ' ', sentence)
    while cleaned_sent.find("  ") > -1:
        cleaned_sent = cleaned_sent.replace("  ", " ")
    
    entity_key_map_keys = list(entity_key_map.keys()) # As we will be dunamically adding entries to this dict an dthat will throw an error.
    entities_found = []
    for i in entity_key_map_keys:
        if cleaned_sent.find(" " + i + " ") > -1:
            entities_found.append(i)
        elif cleaned_sent.lower().find(" " + i + " ") > -1:
            found_idx = cleaned_sent.lower().find(" " + i + " ")
            entity_dict[cleaned_sent[found_idx:found_idx+len(" " + i + " ")]] = entity_dict[i]
            entity_key_map[cleaned_sent[found_idx:found_idx+len(" " + i + " ")]] = entity_key_map[i]
    
    entities_found.sort(key=lambda s: len(s))
    len_sorted_entities = entities_found.copy()
    
    subset_entities = []
    # Remove subset entities (eg: Among cnn and 3-layer-cnn, prefer the latter)
    for fe in len_sorted_entities:
        for other_ent in len_sorted_entities:
            if fe != other_ent and other_ent.find(fe) > -1:
                subset_entities.append(fe)
                break
    for se in subset_entities:
        len_sorted_entities.remove(se)
    for maxents in len_sorted_entities:
        mask_name = " " + entity_dict[entity_key_map[i]].lower() + " "
        if replace_with_dataset:
            if mask_name == " material ":
                mask_name = " dataset "
        cleaned_sent = cleaned_sent.replace(" " + maxents + " ", mask_name)
    words_cleaned = nltk.word_tokenize(cleaned_sent)
    dups_removed = [v for i, v in enumerate(words_cleaned) if i == 0 or v != words_cleaned[i-1]]
    new_dup_removed_sent = " ".join(dups_removed)
    return new_dup_removed_sent.strip()

#     #print(cleaned_sent)
#     for i in entity_key_map:
#         if cleaned_sent.find(" " + i + " ") > -1:
#             #print("Substituting ent: {} with mask: {}".format(i, entity_dict[entity_key_map[i]].lower()))
#             cleaned_sent = cleaned_sent.replace(i, entity_dict[entity_key_map[i]].lower())
#     return cleaned_sent

In [28]:
mask_entities("In the BO-PET test*, the best method is to take\ risks. This leads to substantial improvement in results.")

'In the BO-PET test , the best method is to take risks . This leads to substantial improvement in results .'

In [29]:
nltk.word_tokenize("this is a test")

['this', 'is', 'a', 'test']

In [30]:
nlp = spacy.load('en_core_web_sm')
sp_toks = ["result", "method", "task", "dataset", "metric", "baseline", "fair", "unfair"]

In [31]:
def extract_chunks_using_spacy_dp(conssentence, replace_with_dataset=True):
    
    conssentence = mask_entities(conssentence, replace_with_dataset)
#     print(conssentence)
    doc = nlp(conssentence)
    verb_subtree = []

    for s in doc.sents:
#         find_special_tokens = {"compar": [], "result": [], "method": [], "technique": [], "task": [], "dataset": [], "material": [], "metric": []}
        find_special_tokens = {"compar": [], "result": [], "method": [], "baseline": [], "task": [], 
                               "dataset": [],  "metric": [], "unfair": [], "fair": []}

        for tok in s:

            if tok.text.lower().startswith("compar"):
                find_special_tokens["compar"].append(tok)
            else:
                for k in sp_toks:
                    if tok.text.lower().startswith(k):
                        find_special_tokens[k].append(tok)
                        break

        verb_tokens = []
        if find_special_tokens["compar"]:
            for t in find_special_tokens["compar"]:
#                     verb_subtree.append(t.subtree)
                if t == s.root:
                    simplified_sent = ""
                    for chh in t.lefts:
                        simplified_sent = simplified_sent + " " + chh.text
                    simplified_sent = simplified_sent + " " + t.text
                    for chh in t.rights:
                        simplified_sent = simplified_sent + " " + chh.text
#                         print("SIMP: ", simplified_sent)
                    verb_subtree.append(simplified_sent)
                else:
                    verb_subtree.append(t.subtree)
        else:
            for k in sp_toks:
                for i in find_special_tokens[k]:
                    local_vt = []
                    for j in i.ancestors:
                        if j.pos_ == "NOUN":
                            local_vt.append(j)
                    if not local_vt:
                        for j in i.ancestors:
                            if j.pos_ == "VERB":
                                local_vt.append(j)
                    verb_tokens = verb_tokens + local_vt


            for i in verb_tokens:
                verb_subtree.append(i.subtree)

    eecc = []
    for i in verb_subtree:
        if type(i) == str:
            eecc.append(i)
        else:
            local_chunk = ""
            for lcaltok in i:
                local_chunk = local_chunk + " " + lcaltok.text
            eecc.append(local_chunk)
#     if not eecc:
#         print(conssentence)
    return list(set(eecc))


In [32]:
extract_chunks_using_spacy_dp("It would be interesting to explore the practicability of the method on more large-scale experiments on image related tasks.")

[' more large - scale experiments on image related tasks',
 ' the practicability of the method']

In [33]:
extract_chunks_using_spacy_dp("The experimental validation is also not extensive since comparison to SOTA is not included.")

[' comparison to SOTA']

In [34]:
def extract_content_words_spacy(conssentence):
    
    doc = nlp(conssentence)
    final_sentence = []
    
    for s in doc.sents:
        for tok in s:
            if not tok.is_stop:
                final_sentence.append(tok.text)
    return final_sentence


In [34]:
def flatten_list(l):
    final_list = []
    for i in l:
        if type(i) == list:
            final_list += flatten_list(i)
        else:
            final_list.append(i)
    return final_list

In [35]:
def return_subtree_chunks(subtree_chunks, depth_to_split, dict_len_st):
    final_chunks_sent = [] 
    
    for stchunk in subtree_chunks:
#         print("Stchunk: ", stchunk)
#         print(len(stchunk[0].leaves()), stchunk[0].leaves())

        if len(stchunk[0].leaves()) > 6:
            subsubtrees = list(stchunk[0].subtrees())
            fnlsubsub_len5_words = []
            new_subdepth_to_split = depth_to_split+1
            for sss in subsubtrees:
                if str(sss) in dict_len_st and dict_len_st[str(sss)] == new_subdepth_to_split:
#                     print("SSS: ", sss)
                    fnlsubsub_len5_words.append(return_subtree_chunks([(sss, new_subdepth_to_split)], new_subdepth_to_split, dict_len_st))

            for subchunk in fnlsubsub_len5_words:
                final_chunks_sent.append(subchunk)
        else:
            final_chunks_sent.append(" ".join(stchunk[0].leaves()))
    
    return final_chunks_sent

In [36]:
corenlp = StanfordCoreNLP("/home/shruti/Documents/DataNLP/stanford-corenlp-4.1.0/")

In [39]:
corenlp.close()

In [40]:
def get_constituency_chunks(sent):
    corenlp = StanfordCoreNLP("/home/shruti/Documents/DataNLP/stanford-corenlp-4.1.0/")
    parse_str = corenlp.parse(sent)
    nltk_tree = Tree.fromstring(parse_str)
    
#     print(nltk_tree)
    
    subtrees_list = list(nltk_tree.subtrees())
    subtrees_tpos = nltk_tree.treepositions()
    for i in range(0, len(nltk_tree.leaves())):
        tp_leaf = nltk_tree.leaf_treeposition(i)
        subtrees_tpos.remove(tp_leaf)
    
    dict_len_st = {}
    depth_of_subtree = []
    for _, i in enumerate(subtrees_list):
        depth_of_subtree.append((i, len(subtrees_tpos[_])))
        dict_len_st[str(i)] = len(subtrees_tpos[_])
    
    cdepths = []
    for d in depth_of_subtree:
        cdepths.append(d[1])
    depth_counter = Counter(cdepths)
    sorted_depths = sorted(list(depth_counter.keys()))
#     print(sorted(depth_counter.items(), key=lambda x: x[0]))
    
    depth_to_split = None
#     print(sorted_depths) 
    for sd in sorted_depths:
        if depth_counter[sd] == 3:
            depth_to_split = 3
        elif depth_counter[sd] > 3:
            depth_to_split = sd
            break
#     if depth_to_split == None or depth_to_split == 4:
#         print("Depth to split: {}".format(depth_to_split))
        
#     print("depth: ", depth_to_split)
    
    subtree_chunks = []
    for i in depth_of_subtree:
        if i[1] == depth_to_split:
            subtree_chunks.append(i)
    
    final_chunks_sent = []
    
    final_chunks_sent = return_subtree_chunks(subtree_chunks, depth_to_split, dict_len_st)
    
# #     for tt in subtree_chunks:
# #         print(tt)
    
#     for stchunk in subtree_chunks:
#         print(len(stchunk[0].leaves()), stchunk[0].leaves())
# #         print(stchunk)
#         if len(stchunk[0].leaves()) > 5:
#             subsubtrees = list(stchunk[0].subtrees())
#             fnlsubsub = []
#             new_subdepth_to_split = depth_to_split+1
#             new_subdepths = []
#             for sss in subsubtrees:
#                 new_subdepths.append(dict_len_st[str(sss)])
#             new_subdepths = sorted(new_subdepths)
#             new_subdepth_to_split = new_subdepths[len(new_subdepths)//2]
#             for sss in subsubtrees:
#                 if str(sss) in dict_len_st and dict_len_st[str(sss)] == new_subdepth_to_split:
#                     fnlsubsub.append(sss)
#             for subchunk in fnlsubsub:
#                 final_chunks_sent.append(" ".join(subchunk.leaves()))
#         else:
#             final_chunks_sent.append(" ".join(stchunk[0].leaves()))
# #         final_chunks_sent.append(" ".join(stchunk[0].leaves()))
    corenlp.close()
    stop_words = list(stopwords.words('english'))
    final_chunks = flatten_list(final_chunks_sent)
    stopwords_removed_chunks = []
    for chunk in final_chunks:
        if chunk in stop_words:
            stopwords_removed_chunks.append(chunk)
    return stopwords_removed_chunks

In [41]:
roberta_vectors = defaultdict(dict)
skip_uids = []

for pid in gt_dict:
    roberta_vectors[pid] = {}
    
    for mcs in gt_dict[pid]["mcomp"]:
        try:
            mcomp_chunks_from_sent = get_constituency_chunks(df.loc[mcs]["Sent"])
            if mcomp_chunks_from_sent:
                final_chunks = mcomp_chunks_from_sent
            else:
                final_chunks = [df.loc[mcs]["Sent"]]
            
            roberta_vectors[pid][mcs] = []
            for single_chunk in final_chunks:
                vec = embed_text_using_roberta(single_chunk.strip()).mean(1).detach().numpy()
                roberta_vectors[pid][mcs].append(vec / norm(vec))
        except Exception as ex:
            print(ex, pid, mcs, df.loc[mcs]["Sent"])
            skip_uids.append(mcs)
    
    for mcs in gt_dict[pid]["not_mcomp"]:
        try:
            mcomp_chunks_from_sent = get_constituency_chunks(df.loc[mcs]["Sent"])
            if mcomp_chunks_from_sent:
                final_chunks = mcomp_chunks_from_sent
            else:
                final_chunks = [df.loc[mcs]["Sent"]]
            
            roberta_vectors[pid][mcs] = []
            for single_chunk in final_chunks:
                vec = embed_text_using_roberta(single_chunk.strip()).mean(1).detach().numpy()
                roberta_vectors[pid][mcs].append(vec / norm(vec))
        except Exception as ex:
            print(ex, pid, mcs, df.loc[mcs]["Sent"])
            skip_uids.append(mcs)

Expecting value: line 1 column 1 (char 0) 2018_Hki-ZlbA- 238 * I don’t know if the notation in the Equation in the paragraph describing Carlini & Wagner comes from the original paper, but the inner max would be easier to read as \max_{i \neq t} \{Z(x’)_i \}
* Page 3 “Neural network verification”: I dont agree with the statement that neural networks commonly are trained on “a small set of inputs”.
Expecting value: line 1 column 1 (char 0) 2018_Hki-ZlbA- 250 I therefore find the authors' statement on page 3 disturbing: "... they are trained over a small set of inputs, and can then perform well, in general, on previously-unseen inputs" -- which seems false (with high probability over all possible worlds).
Expecting value: line 1 column 1 (char 0) 2020_r1e_FpNFDr 355 By the standard argument of the statistical learning theory (such as Theorem A.4), we can typically bound the generalization error by $O(B\sqrt{D/N})$ where $B$ is the infimum of Lipschitz constant of hypotheses, $D$ is the in

In [42]:
mcomp_sentences = {}
not_mcomp_sentences = {}

for pid in gt_dict:
    for mcs in gt_dict[pid]["mcomp"]:
        if not mcs in skip_uids:
            mcomp_sentences[mcs] = pid
    for mcs in gt_dict[pid]["not_mcomp"]:
        if not mcs in skip_uids:
            not_mcomp_sentences[mcs] = pid
print(len(mcomp_sentences), len(not_mcomp_sentences))

114 1372


In [43]:
sim_with_mcomp = defaultdict(dict)
sim_with_not_mcomp = defaultdict(dict)
sim_with_notmcomp_paper_sents = defaultdict(dict)

mean_at_k = ["1", "3", "5", "7", "10", "20", "30", "50", "100", "500", "1000", "1380"]

for sid in mcomp_sentences:
    
    # 1. With other mcomp sentences
    temp_list = []    
    for osid in mcomp_sentences:
        if osid != sid:
            for cvec1 in roberta_vectors[mcomp_sentences[osid]][osid]:
                for cvec2 in roberta_vectors[mcomp_sentences[sid]][sid]:
                    temp_list.append(np.inner(cvec1, cvec2)[0][0])
    
    sorted_temp_list = sorted(temp_list, reverse=True)
    sim_with_mcomp[sid]["mean"] = np.mean(sorted_temp_list)
    for vv in mean_at_k:
        sim_with_mcomp[sid]["mean_{}".format(vv)] = np.mean(sorted_temp_list[0:int(vv)])

    
    # 2. With other not_mcomp_sentences
    temp_list = []
    for osid in not_mcomp_sentences:
        for cvec1 in roberta_vectors[not_mcomp_sentences[osid]][osid]:
            for cvec2 in roberta_vectors[mcomp_sentences[sid]][sid]:
                temp_list.append(np.inner(cvec1, cvec2)[0][0])
    
    sorted_temp_list = sorted(temp_list, reverse=True)
    sim_with_not_mcomp[sid]["mean"] = np.mean(sorted_temp_list)
    for vv in mean_at_k:
        sim_with_not_mcomp[sid]["mean_{}".format(vv)] = np.mean(sorted_temp_list[0:int(vv)])

    
    # 3. With not_mcomp_sentences of the same paper
    temp_list = []    
    for osid in not_mcomp_sentences:
        if not_mcomp_sentences[osid] == mcomp_sentences[sid]:
            for cvec1 in roberta_vectors[not_mcomp_sentences[osid]][osid]:
                for cvec2 in roberta_vectors[mcomp_sentences[sid]][sid]:
                    temp_list.append(np.inner(cvec1, cvec2)[0][0])
    
    sorted_temp_list = sorted(temp_list, reverse=True)
    sim_with_notmcomp_paper_sents[sid]["mean"] = np.mean(sorted_temp_list)
    for vv in mean_at_k:
        sim_with_notmcomp_paper_sents[sid]["mean_{}".format(vv)] = np.mean(sorted_temp_list[0:int(vv)])
    

In [44]:
# mean_sim_plot
diff12 = {"all": []}
for vv in mean_at_k:
    diff12[str(vv)] = []

diff13 = {"all": []}
for vv in mean_at_k:
    diff13[str(vv)] = []

for sid in sim_with_mcomp:
    diff12["all"].append(sim_with_mcomp[sid]["mean"] - sim_with_not_mcomp[sid]["mean"])
    diff13["all"].append(sim_with_mcomp[sid]["mean"] - sim_with_notmcomp_paper_sents[sid]["mean"])
    
    for vv in mean_at_k:
        diff12[str(vv)].append(sim_with_mcomp[sid]["mean_{}".format(vv)] - sim_with_not_mcomp[sid]["mean_{}".format(vv)])
        diff13[str(vv)].append(sim_with_mcomp[sid]["mean_{}".format(vv)] - sim_with_notmcomp_paper_sents[sid]["mean_{}".format(vv)])


# Result of the most similar constituency chunk

In [45]:
# With dataset as mask
res_table = [[""] + mean_at_k, [""]]

for val in mean_at_k:
    v1 = round(sum(i > 0 for i in diff12[val])/len(diff12[val]), 2)
    res_table[1].append(v1)

display(HTML(tabulate.tabulate(res_table, tablefmt='html')))

# With dataset as mask
res_table = [[""] + mean_at_k, [""]]

for val in mean_at_k:
    v1 = round(sum(i > 0 for i in diff13[val])/len(diff13[val]), 2)
    res_table[1].append(v1)

display(HTML(tabulate.tabulate(res_table, tablefmt='html')))

0,1,2,3,4,5,6,7,8,9,10,11,12
,1.0,3,5,7,10,20,30,50,100,500,1000,1380
,0.02,0,0,0,0,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7,8,9,10,11,12
,1.0,3.0,5.0,7.0,10.0,20.0,30.0,50.0,100.0,500.0,1000.0,1380.0
,0.11,0.25,0.33,0.36,0.55,0.77,0.87,0.91,0.98,0.93,0.91,0.89


# --------------------------------------------------------------------------------------------------------------

# 3. Analyse chunks after masking

In [96]:
masked_chunks = {"mcs": [], "nmcs": []}

for pid in gt_dict:
    for mcs in gt_dict[pid]["mcomp"]:
        try:
            mcomp_chunks_from_sent = extract_chunks_using_spacy_dp(df.loc[mcs]["Sent"])
            if mcomp_chunks_from_sent:
                final_chunk = ". ".join(mcomp_chunks_from_sent)
            else:
                final_chunk = df.loc[mcs]["Sent"]
            
            masked_chunks["mcs"].append((df.loc[mcs]["Sent"], final_chunk))
        except Exception as ex:
            continue
    
    for mcs in gt_dict[pid]["not_mcomp"]:
        try:
            mcomp_chunks_from_sent = extract_chunks_using_spacy_dp(df.loc[mcs]["Sent"])
            if mcomp_chunks_from_sent:
                final_chunk = ". ".join(mcomp_chunks_from_sent)
            else:
                final_chunk = df.loc[mcs]["Sent"]
            masked_chunks["nmcs"].append((df.loc[mcs]["Sent"], final_chunk))
        except Exception as ex:
            continue

In [97]:
extract_chunks_using_spacy_dp("The authors propose k-DPP as an open loop (oblivious to the evaluation of configurations) method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search, uniform random search, low-discrepancy Sobol sequences, BO-TPE (Bayesian optimization using tree-structured Parzen estimator) by Bergstra et al (2011).")

[' comparison']

In [98]:
mask_entities("The authors propose k-DPP as an open loop (oblivious to the evaluation of configurations) method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search, uniform random search, low-discrepancy Sobol sequences, BO-TPE (Bayesian optimization using tree-structured Parzen estimator) by Bergstra et al (2011).")

'The authors propose k-DPP as an open loop oblivious to the evaluation of configurations method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search , uniform random search , low-discrepancy Sobol sequences , BO-TPE Bayesian optimization using tree-structured Parzen estimator by Bergstra et al 2011 .'

In [99]:
extract_chunks_using_spacy_dp("The authors propose k-DPP as an open loop (oblivious to the evaluation of configurations) method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search, uniform random search, low-discrepancy Sobol sequences, BO-TPE (Bayesian optimization using tree-structured Parzen estimator) by Bergstra et al (2011).")

[' comparison']

In [100]:
extract_chunks_using_spacy_dp("The authors propose k-DPP as an open loop (oblivious to the evaluation of configurations) method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search, uniform random search, low-discrepancy Sobol sequences, BO-TPE (Bayesian optimization using tree-structured Parzen estimator) by Bergstra et al (2011).")

[' comparison']

In [101]:
masked_chunks["mcs"][2:5]

[('The authors propose k-DPP as an open loop (oblivious to the evaluation of configurations) method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search, uniform random search, low-discrepancy Sobol sequences, BO-TPE (Bayesian optimization using tree-structured Parzen estimator) by Bergstra et al (2011).',
  ' comparison'),
 ('Second, their study only applies to a small number like 3-6 hyperparameters with a small k=20) The real challenge lies in scaling up to many hyperparameters or even k-DPP sampling for larger k. Third, the authors do not compare against some relevant, recent work, e.g., Springenberg et al (http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf) and Snoek et al (https://arxiv.org/pdf/1502.05700.pdf) that is essential for this kind of empirical study.',
  ' a small number like 3 - 6 metric with a small k 20.  lies , authors do not compare against'),
 ('COMMENTS ON THE CHANGES SINCE THE LAST

In [102]:
mask_entities("The authors propose k-DPP as an open loop (oblivious to the evaluation of configurations) method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search, uniform random search, low-discrepancy Sobol sequences, BO-TPE (Bayesian optimization using tree-structured Parzen estimator) by Bergstra et al (2011).")

'The authors propose k-DPP as an open loop oblivious to the evaluation of configurations method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search , uniform random search , low-discrepancy Sobol sequences , BO-TPE Bayesian optimization using tree-structured Parzen estimator by Bergstra et al 2011 .'

In [104]:
# The authors propose k-DPP as an open loop (oblivious to the evaluation of configurations) method for hyperparameter optimization and provide its empirical study and comparison with other methods such as grid search, uniform random search, low-discrepancy Sobol sequences, BO-TPE (Bayesian optimization using tree-structured Parzen estimator) by Bergstra et al (2011).

In [105]:
mask_entities("Third, the authors do not compare against some relevant, recent work, e.g., Springenberg et al (http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf) and Snoek et al (https://arxiv.org/pdf/1502.05700.pdf) that is essential for this kind of empirical study.")

'Third , the authors do not compare against some relevant , recent work , e.g. , Springenberg et al http : aad.informatik.uni-freiburg.de papers 16-NIPS-BOHamiANN.pdf and Snoek et al https : arxiv.org pdf 1502.05700.pdf that is essential for this kind of empirical study .'

In [105]:
# def mask_entities(sentence, replace_with_dataset=False):
# #     cleaned_sent = re.sub('[^0-9a-zA-Z ]+', ' ', sentence)
#     cleaned_sent = sentence
#     while cleaned_sent.find("  ") > -1:
#         cleaned_sent = cleaned_sent.replace("  ", " ")
    
#     entities_found = []
#     for i in entity_key_map:
#         if cleaned_sent.find(" " + i + " ") > -1:
#             entities_found.append(i)
    
#     entities_found.sort(key=lambda s: len(s))
#     len_sorted_entities = entities_found.copy()
    
#     subset_entities = []
#     # Remove subset entities (eg: Among cnn and 3-layer-cnn, prefer the latter)
#     for fe in len_sorted_entities:
#         for other_ent in len_sorted_entities:
#             if fe != other_ent and other_ent.find(fe) > -1:
#                 subset_entities.append(fe)
#                 break
#     for se in subset_entities:
#         len_sorted_entities.remove(se)
#     for maxents in len_sorted_entities:
#         mask_name = entity_dict[entity_key_map[i]].lower()
#         if replace_with_dataset:
#             if mask_name == "material":
#                 mask_name = "dataset"
#         cleaned_sent = cleaned_sent.replace(maxents, mask_name)
#     words_cleaned = nltk.word_tokenize(cleaned_sent)
#     dups_removed = [v for i, v in enumerate(words_cleaned) if i == 0 or v != words_cleaned[i-1]]
#     new_dup_removed_sent = " ".join(dups_removed)
#     return new_dup_removed_sent.strip()

# #     #print(cleaned_sent)
# #     for i in entity_key_map:
# #         if cleaned_sent.find(" " + i + " ") > -1:
# #             #print("Substituting ent: {} with mask: {}".format(i, entity_dict[entity_key_map[i]].lower()))
# #             cleaned_sent = cleaned_sent.replace(i, entity_dict[entity_key_map[i]].lower())
# #     return cleaned_sent

### Analyse meaningful sentences that are more similar to NMCS in comparison to MCS.

In [106]:
ana_sim_with_mcomp = defaultdict(list)
ana_sim_with_not_mcomp = defaultdict(list)


mean_at_k = ["1", "3", "5", "7", "10", "20", "30", "50", "100", "500", "1000", "1380"]

for sid in mcomp_sentences:
    
    # 1. With other mcomp sentences
    temp_list = []    
    for osid in mcomp_sentences:
        if osid != sid:
            temp_list.append((osid, np.inner(roberta_vectors[mcomp_sentences[osid]][osid], roberta_vectors[mcomp_sentences[sid]][sid])[0][0]))
    
    sorted_temp_list = sorted(temp_list, key=lambda x: x[1], reverse=True)
    ana_sim_with_mcomp[sid] = sorted_temp_list

    
    # 2. With other not_mcomp_sentences
    temp_list = []
    for osid in not_mcomp_sentences:
        temp_list.append((osid, np.inner(roberta_vectors[not_mcomp_sentences[osid]][osid], roberta_vectors[mcomp_sentences[sid]][sid])[0][0]))
    
    sorted_temp_list = sorted(temp_list, key=lambda x: x[1], reverse=True)
    ana_sim_with_not_mcomp[sid] = sorted_temp_list

In [107]:
problematic_sentences_at_k = defaultdict(list)
unproblematic_sentences_at_k = defaultdict(list)
vv = 1

for sid in sim_with_mcomp:
    sim_diff = (sim_with_mcomp[sid]["mean_{}".format(vv)] - sim_with_not_mcomp[sid]["mean_{}".format(vv)])
    if sim_diff < 0:
        problematic_sentences_at_k[vv].append((sid,-1.0* sim_diff))
    else:
        unproblematic_sentences_at_k[vv].append((sid, sim_diff))

In [108]:
k = 1
sorted_problematic_sentences_at_1 = sorted(problematic_sentences_at_k[k], key=lambda x: x[1], reverse=True)
sorted(problematic_sentences_at_k[k], key=lambda x: x[1], reverse=True)[0:5]

[(113, 0.1638484001159668),
 (1464, 0.12271469831466675),
 (931, 0.1226879358291626),
 (950, 0.12097209692001343),
 (1318, 0.10917872190475464)]

In [109]:
# k = 1
# sorted_problematic_sentences_at_3 = sorted(problematic_sentences_at_k[3], key=lambda x: x[1], reverse=True)
# sorted(problematic_sentences_at_k[3], key=lambda x: x[1], reverse=True)[0:5]

In [110]:
# sorted_problematic_sentences_at_3[0:3], sorted_problematic_sentences_at_3[-3:]

In [111]:
# sorted_unproblematic_sentences_at_3 = sorted(unproblematic_sentences_at_k[3], key=lambda x: x[1], reverse=True)
# sorted_unproblematic_sentences_at_3[0:4], sorted_unproblematic_sentences_at_3[-4:]

In [113]:
for s in sorted_problematic_sentences_at_1[5:10]:
    print("Test sent: ", df.loc[s[0]]["Sent"])
    
    print("\nMeaningful comparison sentences: ")
    print(ana_sim_with_mcomp[s[0]][0:1])
    for i in ana_sim_with_mcomp[s[0]][0:1]:
        print(df.loc[i[0]]["Sent"])
    
    print("\nNon Meaningful comparison sentence: ")
    print(ana_sim_with_not_mcomp[s[0]][0:1])
    for i in ana_sim_with_not_mcomp[s[0]][0:1]:
        print(df.loc[i[0]]["Sent"])
    
    print("\n\n")

Test sent:  The experimental results are very good for document modeling, but without ablation analysis against the baseline it is hard to see why they should be with such a small modification in G-NVDM.

Meaningful comparison sentences: 
[(124, 0.74430156)]
What is left is the gated incremental pooling operation; but to show that this operation is beneficial when added to autoregressive CNNs, a thorough comparison with an autoregressive CNN baseline is necessary.

Non Meaningful comparison sentence: 
[(842, 0.8440055)]
Paper Weaknesses:
- The evaluation of the model is not great: (1) It would be interesting to combine bedroom and kitchen images and train jointly to see what it learns.



Test sent:  The paper does not consider the more recent and highly relevant Moosavi-Dezfooli et al “Universal Adversarial Perturbations” CVPR 2017.

Meaningful comparison sentences: 
[(1202, 0.5903547)]
- I am concerned about whether the proposed method works well with harder datasets such as Office-H

In [114]:
mask_entities("Minor comments:- I believe one should not compare the distance shown between the left and right columns of Figure 3 as they are obtained from two different models.")

'Minor comments : - I believe one should not compare the metric shown between the left and right columns of Figure 3 as they are obtained from two different models .'

In [115]:
extract_chunks_using_spacy_dp("Minor comments:- I believe one should not compare the distance shown between the left and right columns of Figure 3 as they are obtained from two different models.")

[' one should not compare the metric shown between the left and right columns of Figure 3 as they are obtained from two different models']

In [59]:
mask_entities("Although I do like the paper on the whole, to really convince me that main objective -- ie that **iterative** improvement is beneficial -- has been satifactorily demonstrated it would be necessary to include stronger baselines - and in particular, to show that an iterative refinement scheme can really improve over a system closely matched to the attention-based model, both when used in isolation and when used in system combination with a PBMT system, and to demonstrate that the PBMT system is not simply acting as a regulariser for the attention-based model.")

'Although I do like the paper on the whole , to really convince me that main objective -- ie that iterative improvement is beneficial -- has been satifactorily demonstrated it would be necessary to include stronger baselines - and in particular , to show that an metric scheme can really improve over a system closely matched to the attention-based model , both when used in isolation and when used in system combination with a PBMT system , and to demonstrate that the PBMT system is not simply acting as a regulariser for the attention-based model .'

In [58]:
extract_chunks_using_spacy_dp("Although I do like the paper on the whole, to really convince me that main objective -- ie that **iterative** improvement is beneficial -- has been satifactorily demonstrated it would be necessary to include stronger baselines - and in particular, to show that an iterative refinement scheme can really improve over a system closely matched to the attention-based model, both when used in isolation and when used in system combination with a PBMT system, and to demonstrate that the PBMT system is not simply acting as a regulariser for the attention-based model.")

[' an metric scheme']

In [56]:
mask_entities("In summary, while I think the paper is interesting, I suspect that the applicability of this technique is possibly limited at present, and I'm unsure how much we can really read into the findings of the paper when the experiments are based on MNIST alone.")

'In summary , while I think the paper is interesting , I suspect that the applicability of this technique is possibly limited at present , and I m unsure how much we can really read into the findings of the paper when the experiments are based on MNIST alone .'

In [84]:
extract_chunks_using_spacy_dp("However, it's not completely fair to compare a label-noise + semi-supervised method with other label-noise only methods... As a matter of fact, you don't need to apply perturbation consistency (or other semi-supervised) regularization after identifying the training data with incorrect labels.")

[' to compare a label - noise semi - supervised method with other label - noise only methods',
 ' perturbation consistency or other semi - supervised metric']