In [1]:
import tensorflow as tf

In [2]:
import tensorflow_hub as hub

In [3]:
from glob import glob
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import nltk
from collections import defaultdict

In [4]:
data_path = "/home/shruti/Desktop/iitgn/courses/SEM2/ML/Project/code/PaperAcceptancePrediction/ICLR data/masterdata_unbalanced/"

years = [2017, 2018, 2019, 2020]
rev_dict = {}
paper_dict = {}
dec_dict = {}
iclr_arxiv_map = {}

for y in years:
    rev_dict[y] = pd.read_pickle(data_path + "off_rev_dict_{}.pkl".format(y))
    paper_dict[y] = pd.read_pickle(data_path + "papers_{}.pkl".format(y))
    dec_dict[y] = pd.read_pickle(data_path + "paper_decision_dict_{}.pkl".format(y))

iclr_arxiv_map = pd.read_pickle("./data/iclr_arxiv_map.pkl")

# Dataset

We were thinking about this problem for EACL:
    
Given a review text, can we identify the text span that talks about the comparisons.
After identification, can we identify different aspects associated with it:
1. Is it positive or negative?
2. What module does it talks about (dataset, model, metric, etc.)
3. Does it suggests some papers to cite?
4. We can also suggest the overlap between different reviews in terms of comparison comments. 
This can help us understanding how much reviews are coherent

Motivation: The system would be more useful for metareviewers. Authors can also be benefited, but they 
can always read the review and get this info.  We can always say that with the increase in exponential 
growth in conference submissions, the evaluation task for meta-reviewers is becoming more and more hard. 
The current system would help the metareviews task in quickly glancing the multiple reviews for a given paper.

### Read the initial set of sentences

In [5]:
df = pd.read_excel("data/initialsetmcomp.xlsx")

In [6]:
df.size, df.head()

(117,          pid                                               sent  mcomp
 0  S1HcOI5le  The idea of the paper is interesting there are...      1
 1  S1HcOI5le  It's not clear how this method compares agains...      1
 2  S1HcOI5le  Measure: Accuracy difference does not look lik...      1
 3  S1HcOI5le  Instead the authors could position this work a...      1
 4  S1HcOI5le  If the authors care to compare their approach ...      1)

### Use above to find sentences from 2020 reviews for 10 papers

In [7]:
sents_dict = defaultdict(list)
stop = 1

for y in [2020]:
    for k in rev_dict[y]:
        
        if stop > 10:
            break
        stop += 1 
            
        year_key = str(y) + "_" + k
        if year_key in iclr_arxiv_map:
            for rev_num in rev_dict[y][k]:
                rev_text = rev_num["content"]["review"]
                
                rev_text = re.sub(" e[\.]?g[\.]?:? ", " eg: ", rev_text)
                rev_text = re.sub(" et[\.]? al[\.]", " et al", rev_text)
                rev_text = re.sub(" i[\.]?e[\.] ", " ie ", rev_text)
                rev_text = re.sub("\\n", " ", rev_text)
                
                sent_text = nltk.sent_tokenize(rev_text)
                
                for s in sent_text:
                    sents_dict[year_key].append(s)

In [9]:
sents_dict["2020_B1eBoJStwr"]

['This work analyzes the consistency regularization in semi-supervised semantic segmentation.',
 'Based on the results on a toy dataset, this work proposes a novel regularization for semi-supervised semantic segmentation, which is named CowMix.',
 'Pros: -- The proposed CoxMix is easy to understand and implement.',
 '-- The experimental results seem to benefit from this proposed CoxMix at a first glance.',
 'Cons: The writing is not clear.',
 'Sometimes I have to make a ``guess" about the technical details.',
 'For example: -- Other than L_{cons}, is there any other Loss term utilized in this work?',
 'Based on Figure 3, it seems only L_{cons} is utilized.',
 'If so, is it a waste not to use the label training data (although very few) to calculate a cross-entropy loss?',
 '-- It seems the experimental setting in this submission follows the settings in Hung, 2018.',
 'However, for the experiment on VOC 2012 validation set, Hung tested their method on 1/8 1/4 1/2 of labeled data (Table 1

In [11]:
embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/4")

KeyboardInterrupt: 

In [12]:
embed = hub.Module("/home/shruti/Desktop/research/meaningful_comparison/use_models/universal-sentence-encoder-lite_2")

In [13]:
messages = [
    "The quick brown fox jumps over the lazy dog.",
    "I am a sentence for which I would like to get its embedding"]

In [15]:
# Reduce logging output.
tf.logging.set_verbosity(tf.logging.ERROR)

with tf.Session() as session:
    session.run([tf.global_variables_initializer(), tf.tables_initializer()])
    message_embeddings = session.run(embed(messages))

TypeError: Signature expects multiple inputs. Use a dict.

In [None]:
print(embeddings)

In [None]:
for k, v in sents_dict.items():
    

In [4]:
keywords = ["baseline", "compar", "et al", "SOTA", "state of the art", "state-of-the-art", "underperform", "outperform"]

kw_poor = ["novel", "evaluat", "benchmark", "contribution", "contrast", "method", "result", "significan", 
           "approach", "performance", "technique", "report", "experiment", "propose", "model", "discuss", 
           "problem", "task", "metric", "score", "publication", "analyze", "analyse", "analysis", 
           "replicate", "submission"]

# &, et al, [1,2][7-9,0-1][0-9][0-9]


In [5]:
years

[2017, 2018, 2019, 2020]

In [6]:
sents_list = []
stop = False

for y in years[:-1]:
    if stop:
        break
    for k in rev_dict[y]:
        
        if len(sents_list) > 800*(y-2016) and y != 2019:
            change_year = True
            print("Changing year: ", y)
            break
        if len(sents_list) > 5000:
            stop = True
            break
            
        year_key = str(y) + "_" + k
        if year_key in iclr_arxiv_map:
            for rev_num in rev_dict[y][k]:
                rev_text = rev_num["content"]["review"]
                
                rev_text = re.sub(" e[\.]?g[\.]?:? ", " eg: ", rev_text)
                rev_text = re.sub(" et[\.]? al[\.]", " et al", rev_text)
                rev_text = re.sub(" i[\.]?e[\.] ", " ie ", rev_text)
                rev_text = re.sub("\\n", " ", rev_text)
                
                sent_text = nltk.sent_tokenize(rev_text)
                
                for s in sent_text:
                    for kw in keywords:
                        if s.find(kw) > -1:
                            sents_list.append((k, s))
                            break

Changing year:  2017
Changing year:  2018


In [7]:
df = pd.DataFrame(sents_list, columns=['pid', 'sentence'])

In [8]:
df.head(2)

Unnamed: 0,pid,sentence
0,BydrOIcle,"So, this comparison might just be showing that..."
1,SyOvg6jxx,Several points are appealing about this approa...


In [9]:
# df = df.sample(frac=1.0)

In [10]:
df.head(10)

Unnamed: 0,pid,sentence
0,BydrOIcle,"So, this comparison might just be showing that..."
1,SyOvg6jxx,Several points are appealing about this approa...
2,SyOvg6jxx,"In addition, there are results for comparison ..."
3,SyOvg6jxx,The results indicate that the approach clearly...
4,SyOvg6jxx,It seems like the technique could be easily us...
5,SyOvg6jxx,The paper addresses an important problem (expl...
6,SyOvg6jxx,It is a nice alternative approach to the one o...
7,SyOvg6jxx,"Specifically, I am not as concerned about beat..."
8,SyOvg6jxx,The figure S9 from Mnih et al points to instan...
9,SyOvg6jxx,"Without ""feature engineering"", the authors ach..."


In [29]:
df.to_excel("ann_comparison_only.xlsx")

UsageError: Line magic function `%nautilus` not found.
