# Fact Verification System

## First, store all sets and preprocessing:

In [None]:
import zipfile
import os
import json
import unicodedata
import re
from collections import defaultdict
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import random
import csv

with open("train.json","r",encoding='utf8') as file:
    train_set = json.load(file)
with open("devset.json","r",encoding='utf8') as file:
    dev_set = json.load(file)
with open("test-unlabelled.json","r",encoding='utf8') as file:
    test_set = json.load(file)
    
#Use NFD normalization to normalize Latin character to be the same as wiki document name
for idx,data in train_set.items():
    for evidence in data['evidence']:
        evidence[0] = unicodedata.normalize('NFC',evidence[0]) 
for idx,data in dev_set.items():
    for evidence in data['evidence']:
        evidence[0] = unicodedata.normalize('NFC',evidence[0])

In [None]:
print(len(train_set))
print(len(dev_set))
print(len(test_set))

### Extract wiki file:

In [None]:
if not os.path.exists("wiki-pages-text"):
    wiki_zip = zipfile.ZipFile("wiki-pages-text.zip")
    wiki_zip.extractall()
    wiki_zip.close()

In [None]:
wiki_content = defaultdict(dict)
for wiki_top, dirnames, filenames in os.walk("wiki-pages-text"):
    for filename in filenames:
        if not filename.endswith(".txt"):
            continue
        file = open(os.path.join(wiki_top,filename))
        sentences = file.readlines()
        file.close()
        for sentence in sentences:
            sentence_list = sentence.strip().split(" ",2)
            try:
                senten_num = int(sentence_list[1]) 
            except ValueError:
                #senten_num doesn't exist, 
                #data format is wrong and should be given up;
                #sum of valid sentence: 25247896
                #sum of total sentences in wiki_doc: 25248397
                continue
            wiki_name = sentence_list[0]
            wiki_name = ' '.join(wiki_name.split('_'))
            senten_cont = sentence_list[2]
            wiki_content[wiki_name][str(senten_num)] = senten_cont

## Second, Index construction:

### Pylucene index construction:

In [None]:
import lucene
from collections import defaultdict
from java.io import File
from org.apache.lucene.document import Document, Field, FieldType
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import IndexWriter,IndexWriterConfig,IndexOptions
from org.apache.lucene.store import SimpleFSDirectory
from datetime import datetime

#construct wiki_index 
def make_wiki_index(index_dir, wiki_doc_dir, analyzer, wiki_content):
    if not os.path.exists(index_dir):
        os.mkdir(index_dir)
    store_dir = SimpleFSDirectory(File(index_dir).toPath())
    writerConfig = IndexWriterConfig(StandardAnalyzer())
    #every time start this function, the index will be built from the beginning
    writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
    writer = IndexWriter(store_dir,writerConfig)
    
    #buiild field type
    doc_type = FieldType()
    doc_type.setStored(True)
    doc_type.setTokenized(True)
    doc_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
    id_type = FieldType()
    id_type.setStored(True)
    id_type.setTokenized(False)
    id_type.setIndexOptions(IndexOptions.DOCS)
    content_type = FieldType()
    content_type.setStored(True)
    content_type.setTokenized(True)
    content_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
    
    #iterate files
    for wiki_top, dirnames, filenames in os.walk(wiki_doc_dir):
        for filename in filenames:
            if not filename.endswith(".txt"):
                continue
            file = open(os.path.join(wiki_top,filename))
            sentences = file.readlines()
            file.close()
            for sentence in sentences:
                sentence_list = sentence.strip().split(" ",2)
                try:
                    senten_num = int(sentence_list[1]) 
                except ValueError:
                    #senten_num doesn't exist, 
                    #data format is wrong and should be given up;
                    #sum of valid sentence: 25247896
                    #sum of total sentences in wiki_doc: 25248397
                    continue
                wiki_name = sentence_list[0]
                wiki_name = ' '.join(wiki_name.split('_'))
                senten_cont = sentence_list[2]
                wiki_content[wiki_name][str(senten_num)] = senten_cont
                doc = Document()
                doc.add(Field("wiki_name",wiki_name,doc_type))
                doc.add(Field("sentence_id",str(senten_num),id_type))
                doc.add(Field("content",senten_cont,content_type))
                writer.addDocument(doc)
    print("%d wiki items in index" % (writer.numDocs()))
    writer.close()
    

#used for selected wiki_set to construct an index smaller than the whole one
def make_target_wiki_index(wiki_set, index_dir, analyzer):
    if not os.path.exists(index_dir):
        os.mkdir(index_dir)
    store_dir = SimpleFSDirectory(File(index_dir).toPath())
    writerConfig = IndexWriterConfig(StandardAnalyzer())
    #every time start this function, the index will be built from the beginning
    writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
    writer = IndexWriter(store_dir,writerConfig)
    
    #buiild field type
    doc_type = FieldType()
    doc_type.setStored(True)
    doc_type.setTokenized(True)
    doc_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
    id_type = FieldType()
    id_type.setStored(True)
    id_type.setTokenized(False)
    id_type.setIndexOptions(IndexOptions.DOCS)
    content_type = FieldType()
    content_type.setStored(True)
    content_type.setTokenized(True)
    content_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
    
    final_names = set()
    #iterate wiki_set and remove repeated names
    for wiki_names in wiki_set.values():
        final_names.update(wiki_names)
    for wiki_name in final_names:
        for sentence_id,content in wiki_content[wiki_name].items():
            doc = Document()
            doc.add(Field("wiki_name",wiki_name,doc_type))
            doc.add(Field("sentence_id",sentence_id,id_type))
            doc.add(Field("content",content,content_type))
            writer.addDocument(doc)
    print("%d wiki items in index" % (writer.numDocs()))
    writer.close()

In [None]:
#Create index every time run the following code
lucene.initVM()
index_dir = "wiki_index"
wiki_dir = "wiki-pages-text"
#analyzer which may need to optimize
analyzer = StandardAnalyzer()
#store whole wiki_content for next index construction
wiki_content = defaultdict(dict) # key = wiki_name, value = dict[sentence_id]:content

start = datetime.now()
make_wiki_index(index_dir, wiki_dir, analyzer, wiki_content)
end = datetime.now()
print("Total wiki index used time: " + str(end - start))

### Pylucene index query:

In [None]:
from org.apache.lucene.index import DirectoryReader,Term
from org.apache.lucene.search import IndexSearcher
from java.io import File
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.queryparser.classic import QueryParser
from allennlp.predictors.predictor import Predictor
from allennlp.models.archival import load_archive
from allennlp.data.tokenizers import word_filter

def doc_search(word_seq, searched_field, searcher, analyzer):
    queryParser = QueryParser(searched_field, analyzer)
    query = queryParser.parse(word_seq)
    
    #store returned wiki_name set
    wiki_set = set()
    
    # Run the query and get top 80(?) results
    topDocs = searcher.search(query, 80)
    #print("%s total matching documents." % len(topDocs.scoreDocs))
    for scoreDoc in topDocs.scoreDocs:
        doc = searcher.doc(scoreDoc.doc) #index by id=scoreDoc.doc
        wiki_set.add(doc.get("wiki_name"))
    return wiki_set

#used for query of entire claim sentence(remove stopwords) to get TOPN sentences should return wiki_name and sentence_id
def sentence_search(claim, top_x, searcher, analyzer):
    #remove stopwords
#     sw_filter = word_filter.StopwordFilter()    
#     print(claim.split(" "))
#     filtered_list = sw_filter.filter_words(claim.split(" "))
#     filtered_sentence = " ".join(filtered_list)
    queryParser = QueryParser('content', analyzer)
#     query = queryParser.parse(filtered_sentence)
    query = queryParser.parse(claim)
    #store returned (wiki_name,sentence_id) tuples
    sentence_list = []
    
    # Run the query and get top x results(for train/dev:30; test:50)
    topDocs = searcher.search(query, top_x)
    #print("%s total matching documents." % len(topDocs.scoreDocs))
    for scoreDoc in topDocs.scoreDocs:
        doc = searcher.doc(scoreDoc.doc) #index by id=scoreDoc.doc
        sentence_list.append([doc.get("wiki_name"),doc.get("sentence_id")])
    return sentence_list

### NER model construction: 

In [None]:
def NER_modeling(predictor, sentence): #construct open Information Extraction model
    NER_predicted = predictor.predict(sentence)
    NER_results = []
    entity = ""
    for idx, tag in enumerate(NER_predicted['tags']):
        if tag != 'O': #need to store
            entity = " ".join((entity,NER_predicted['words'][idx]))
        else:
            if entity != "":
                entity = re.sub("\[","-LSB- ",entity)
                entity = re.sub("]"," -RSB-",entity)
                entity = re.sub("\(","-LRB- ",entity)
                entity = re.sub("\)"," -RRB-",entity)
                entity = re.sub(":","-COLON-",entity)
                entity = re.sub("\"","",entity)
                entity = re.sub("\/","",entity)
                entity = re.sub("!","",entity)
                NER_results.append(entity.strip())
                entity = ""
    if entity != "":
        entity = re.sub("\[","-LSB- ",entity)
        entity = re.sub("]"," -RSB-",entity)
        entity = re.sub("\(","-LRB- ",entity)
        entity = re.sub("\)"," -RRB-",entity)
        entity = re.sub(":","-COLON-",entity)
        entity = re.sub("\"","",entity)
        entity = re.sub("\/","",entity)
        entity = re.sub("!","",entity)
        NER_results.append(entity.strip())
    return NER_results

## Third, Searching for train/dev/test set to narrow down candidate wikis and sentences:

### 1.First round of searching 'content'&'wiki_name' Field for wiki_document (using NER). Keep recall of wikis high to ensure most docs are included in, so won't miss too many wikis for test set.

In [None]:
#trainset as what dev do
store_FSD = SimpleFSDirectory(File(index_dir).toPath())
reader = DirectoryReader.open(store_FSD)
searcher = IndexSearcher(reader)
analyzer = StandardAnalyzer()
#GPU mode
#archive = load_archive("https://s3-us-west-2.amazonaws.com/allennlp/models/fine-grained-ner-model-elmo-2018.12.21.tar.gz", cuda_device=0)
NER_archive = load_archive("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz", cuda_device=0)
NER_predictor = Predictor.from_archive(NER_archive)

#store filtered wiki_name for data, key is corresponding claim  id
train_wikis = dict()

t1 = datetime.now()

#train_set search NER phrases
for idx,data in train_set.items():
    #store filtered wiki_name(potential evidence within which)
    evident_wikis = set()
    NER_claim = NER_modeling(NER_predictor,data['claim'])
    for ner in NER_claim:
        evident_wikis.update(doc_search(ner, "content", searcher, analyzer))
        evident_wikis.update(doc_search(ner, "wiki_name", searcher, analyzer))
        #改成每个取TOP100,然后对所有claim的返回值取交集？
    train_wikis[idx] = evident_wikis
del searcher
t2 = datetime.now()
print("Total used time: ",t2-t1)

In [None]:
# for dev set
store_FSD = SimpleFSDirectory(File(index_dir).toPath())
reader = DirectoryReader.open(store_FSD)
searcher = IndexSearcher(reader)
analyzer = StandardAnalyzer()
#GPU mode
#archive = load_archive("https://s3-us-west-2.amazonaws.com/allennlp/models/fine-grained-ner-model-elmo-2018.12.21.tar.gz", cuda_device=0)
NER_archive = load_archive("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz", cuda_device=0)
NER_predictor = Predictor.from_archive(NER_archive)

#store filtered wiki_name for data, key is corresponding claim  id
dev_wikis = dict()

t1 = datetime.now()

#dev_set search NER phrases
for idx,data in dev_set.items():
    #store filtered wiki_name(potential evidence within which)
    evident_wikis = set()

    NER_claim = NER_modeling(NER_predictor,data['claim'])
    for ner in NER_claim:
        evident_wikis.update(doc_search(ner, "content", searcher, analyzer))
        evident_wikis.update(doc_search(ner, "wiki_name", searcher, analyzer))
    dev_wikis[idx] = evident_wikis        
        
del searcher
t2 = datetime.now()
print("Total used time: ",t2-t1)

In [None]:
#testset as what dev do
store_FSD = SimpleFSDirectory(File(index_dir).toPath())
reader = DirectoryReader.open(store_FSD)
searcher = IndexSearcher(reader)
analyzer = StandardAnalyzer()
#GPU mode
NER_archive = load_archive("https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.12.18.tar.gz", cuda_device=0)
NER_predictor = Predictor.from_archive(NER_archive)

#store filtered wiki_name for data, key is corresponding claim  id
test_wikis = dict()

t1 = datetime.now()

#test_set search NER phrases
for idx,data in test_set.items():
    #store filtered wiki_name(potential evidence within which)
    evident_wikis = set()
    NER_claim = NER_modeling(NER_predictor,data['claim'])
    for ner in NER_claim:
        evident_wikis.update(doc_search(ner, "content", searcher, analyzer))
        evident_wikis.update(doc_search(ner, "wiki_name", searcher, analyzer))
    test_wikis[idx] = evident_wikis
        
del searcher
t2 = datetime.now()
print("Total used time: ",t2-t1)

#### Calculate recall of correct document:

In [None]:
#recall of correct document
def doc_recall(data_set,wiki_list):
    correct_num = 0.0
    for idx,data in data_set.items():
        correct_evi = 0
        for evidence in data['evidence']:
            if " ".join(evidence[0].split("_")) in wiki_list[idx]:
                correct_evi += 1
        if correct_evi == len(data['evidence']):
            correct_num += 1
    return correct_num/len(data_set)


In [None]:
#print(doc_recall(train_set,train_wikis))
print(doc_recall(dev_set,dev_wikis))

### 2.Second round of searching 'content' Field for TopX sentences (using 'claim' with stopwords removed). Still, keep the recall of sentences high, so test may get most of correct sentences when using this data as input of BERT in the following period.

#### 2.1.Construct index of selected wiki_name:

In [None]:
#sentence index for train!!!
train_index_dir = "sentence_index_train"
#analyzer which may need to optimize
analyzer = StandardAnalyzer()

start = datetime.now()
make_target_wiki_index(train_wikis, train_index_dir, analyzer)
end = datetime.now()
print("Selected-wiki-index used time: " + str(end - start))

In [None]:
#sentence index for dev!!!
dev_index_dir = "sentence_index_dev"
#analyzer which may need to optimize
analyzer = StandardAnalyzer()

start = datetime.now()
make_target_wiki_index(dev_wikis, dev_index_dir, analyzer)
end = datetime.now()
print("Selected-wiki-index used time: " + str(end - start))

In [None]:
#sentence index for test!!!
test_index_dir = "sentence_index_test"
#analyzer which may need to optimize
analyzer = StandardAnalyzer()

start = datetime.now()
make_target_wiki_index(test_wikis, test_index_dir, analyzer)
end = datetime.now()
print("Selected-wiki-index used time: " + str(end - start))

#### 2.2.Searching sentences: (using claim removed stopwords)

In [None]:
stop_words = set(stopwords.words('english') + ["{","}","[","]","(",")","/",",", '.', ':', '!', ';', "'", '"', '&', '$', '#', '@', '?'])

In [None]:
#trainset
store_FSD = SimpleFSDirectory(File(train_index_dir).toPath())
reader = DirectoryReader.open(store_FSD)
searcher = IndexSearcher(reader)
analyzer = StandardAnalyzer()
train_sentences = dict()
t1 = datetime.now()

#train_set search claim
for idx,data in train_set.items():
    claim_tokens = word_tokenize(data['claim'])
    filtered_sentence = " ".join([w for w in claim_tokens if not w in stop_words])
    filtered_sentence = re.sub("\"","",filtered_sentence)
    filtered_sentence = re.sub("/","",filtered_sentence)
    filtered_sentence = re.sub(":","",filtered_sentence)
    train_sentences[idx] = sentence_search(filtered_sentence, 30, searcher, analyzer)

del searcher
t2 = datetime.now()
print("Total used time: ",t2-t1)

In [None]:
#devset
store_FSD = SimpleFSDirectory(File(dev_index_dir).toPath())
reader = DirectoryReader.open(store_FSD)
searcher = IndexSearcher(reader)
analyzer = StandardAnalyzer()
dev_sentences = dict()
t1 = datetime.now()

#dev_set search claim
for idx,data in dev_set.items():
    claim_tokens = word_tokenize(data['claim'])
    filtered_sentence = " ".join([w for w in claim_tokens if not w in stop_words])
    filtered_sentence = re.sub("\"","",filtered_sentence)
    filtered_sentence = re.sub("/","",filtered_sentence)
    filtered_sentence = re.sub(":","",filtered_sentence)
    dev_sentences[idx] = sentence_search(filtered_sentence, 30, searcher, analyzer)

del searcher
t2 = datetime.now()
print("Total used time: ",t2-t1)

In [None]:
#test_set
store_FSD = SimpleFSDirectory(File(test_index_dir).toPath())
reader = DirectoryReader.open(store_FSD)
searcher = IndexSearcher(reader)
analyzer = StandardAnalyzer()
test_sentences = dict()
t1 = datetime.now()

#test_set search claim
for idx,data in test_set.items():
    claim_tokens = word_tokenize(data['claim'])
    filtered_sentence = " ".join([w for w in claim_tokens if not w in stop_words])
    filtered_sentence = re.sub("\"","",filtered_sentence)
    filtered_sentence = re.sub("/","",filtered_sentence)
    filtered_sentence = re.sub(":","",filtered_sentence)
    test_sentences[idx] = sentence_search(filtered_sentence, 30, searcher, analyzer)

del searcher
t2 = datetime.now()
print("Total used time: ",t2-t1)

#### Calculate recall of correct sentences:

In [None]:
#recall of correct document
def second_round_doc_recall(data_set,sentence_list):
    correct_num = 0.0
    for idx,data in data_set.items():
        correct_evi = 0
        for evidence in data['evidence']:
            if [" ".join(evidence[0].split("_")),str(evidence[1])] in sentence_list[idx]:
                correct_evi += 1
        if correct_evi == len(data['evidence']):
            correct_num += 1
    return correct_num/len(data_set)

In [None]:
print(second_round_doc_recall(train_set,train_sentences))
print(second_round_doc_recall(dev_set,dev_sentences))

## Fourth, Train model using BERT:
(see the training and predicting part in "wsta-train-model.ipynb" which is run in Colaboratory):

#### Format input of BERT train set for sentence selection:

In [None]:
#BERT model 的train输入项
train = dict()
for idx in train_set.keys():
    train[idx] = dict()
    train[idx]['claim'] = train_set[idx]['claim']
    train[idx]['evidence'] = defaultdict(dict)
    for sentence in train_set[idx]['evidence']:
        train[idx]['evidence'][" ".join(sentence[0].split("_"))][str(sentence[1])] \
            = [wiki_content[" ".join(sentence[0].split("_"))][str(sentence[1])],"Positive"]
    for sentence in train_sentences[idx]:
        if sentence[0] in train[idx]['evidence'].keys():
            if sentence[1] in train[idx]['evidence'][sentence[0]].keys():
                continue
        train[idx]['evidence'][sentence[0]][sentence[1]] = [wiki_content[sentence[0]][sentence[1]],"Negative"]

with open("trainset_for_train.json","w",encoding = 'utf-8') as f: # store half-processed file for safe
    json.dump(train,f,indent=2)

In [None]:
with open("trainset_for_train.json","r",encoding = 'utf-8') as f:
    train = json.load(f)
    
train_list = [] #output data
train_index = [] #store corresponding idx,wiki_name,sentence_id of dev_list item
for idx in train.keys():
    count = 0
    for wiki_name in train[idx]['evidence'].keys():
        for sentence_id in train[idx]['evidence'][wiki_name].keys():
            label = 1
            if train[idx]['evidence'][wiki_name][sentence_id][1] == "Negative":
                count+=1
                label = 0
            train_list.append([train[idx]['claim'],wiki_name + " , " + train[idx]['evidence'][wiki_name][sentence_id][0],label])
            train_index.append([idx,"_".join(wiki_name.split(" ")),sentence_id])
            if count == 5:
                break
        if count == 5:
            break

In [None]:
with open('trainset_for_model.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in train_list:
        tsv_writer.writerow(row)

#### Format input of BERT dev set for sentence selection:

In [None]:
#BERT model 的dev输入项
dev = dict()
for idx in dev_set.keys():
    dev[idx] = dict()
    dev[idx]['claim'] = dev_set[idx]['claim']
    dev[idx]['evidence'] = defaultdict(dict)
    for sentence in dev_set[idx]['evidence']:
        dev[idx]['evidence'][" ".join(sentence[0].split("_"))][str(sentence[1])] \
            = [wiki_content[" ".join(sentence[0].split("_"))][str(sentence[1])],"Positive"]
    for sentence in dev_sentences[idx]:
        if sentence[0] in dev[idx]['evidence'].keys():
            if sentence[1] in dev[idx]['evidence'][sentence[0]].keys():
                continue
        dev[idx]['evidence'][sentence[0]][sentence[1]] = [wiki_content[sentence[0]][sentence[1]],"Negative"]
with open("devset_for_train.json","w",encoding = 'utf-8') as f: # store half-processed file for safe
    json.dump(dev,f,indent=2)

In [None]:
with open("devset_for_train.json","r",encoding = 'utf-8') as f:
    dev = json.load(f)

dev_list = [] #output data
dev_index = [] #store corresponding idx,wiki_name,sentence_id of dev_list item
for idx in dev.keys():
    count = 0
    for wiki_name in dev[idx]['evidence'].keys():
        for sentence_id in dev[idx]['evidence'][wiki_name].keys():
            label = 1
            if dev[idx]['evidence'][wiki_name][sentence_id][1] == "Negative":
                count+=1
                label = 0
            dev_list.append([dev[idx]['claim'],wiki_name + " , " + dev[idx]['evidence'][wiki_name][sentence_id][0],label])
            dev_index.append([idx,"_".join(wiki_name.split(" ")),sentence_id])
            if count == 5:
                break
        if count == 5:
            break

In [None]:
with open('devset_for_model.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in dev_list:
        tsv_writer.writerow(row)

#### Format input of BERT test set for sentence selection:

In [None]:
#BERT model 的test输入项
test = dict()
for idx in test_set.keys():
    test[idx] = dict()
    test[idx]['claim'] = test_set[idx]['claim']
    test[idx]['evidence'] = defaultdict(dict)
    for sentence in test_sentences[idx]:
        test[idx]['evidence'][sentence[0]][sentence[1]] = wiki_content[sentence[0]][sentence[1]]
with open("testset_for_train.json","w",encoding = 'utf-8') as f:
    json.dump(test,f,indent=2)

In [None]:
with open("testset_for_train.json","r",encoding = 'utf-8') as f:
    test = json.load(f)
test_list = [] #output data
test_index = [] #store corresponding idx,wiki_name,sentence_id of dev_list item
for idx in test.keys():
    for wiki_name in test[idx]['evidence'].keys():
        for sentence_id in test[idx]['evidence'][wiki_name].keys():
            test_list.append([test[idx]['claim'],wiki_name + " , " + test[idx]['evidence'][wiki_name][sentence_id]])
            test_index.append([idx,"_".join(wiki_name.split(" ")),sentence_id])

In [None]:
with open('testset_for_model.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in test_list:
        tsv_writer.writerow(row)

In [None]:
with open("content_to_index_test.json","w",encoding = 'utf-8') as f:
    for row in test_index:
        f.write(row[0]+" "+row[1]+" "+row[2]+"\n")
    f.close()

### Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:

In [None]:
with open("devset_for_train.json","r",encoding = 'utf-8') as f:
    dev = json.load(f)

dev_list_for_test = [] #output data
dev_index_for_test = [] #store corresponding idx,wiki_name,sentence_id of dev_list item

for idx in dev.keys():
    for wiki_name in dev[idx]['evidence'].keys():
        for sentence_id in dev[idx]['evidence'][wiki_name].keys():
            dev_list_for_test.append([dev[idx]['claim'],wiki_name + " , " + dev[idx]['evidence'][wiki_name][sentence_id][0]])
            dev_index_for_test.append([idx,"_".join(wiki_name.split(" ")),sentence_id])
with open('devset_usedfortest_for_model.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in dev_list_for_test:
        tsv_writer.writerow(row)
with open("content_to_index_devastest.json","w",encoding = 'utf-8') as f:
    for row in dev_index_for_test:
        f.write(row[0]+" "+row[1]+" "+row[2]+"\n")
    f.close()

### Classification part preprocess:

#### Deal with the output of sentence selection for test set from colab notebook ：

In [None]:
test_sentence_probs = [] 
with open("selection_test_results.txt","r",encoding='utf-8') as f:
    for row in f.readlines():
        temp = row.strip(" \n][").split(" ")
        ts_probs = [float(temp[0]),float(temp[-1])]
        test_sentence_probs.append(ts_probs)
for idx,data in test_set.items():
    test_set[idx]['evidence'] = []
for idx,probs in enumerate(test_sentence_probs):
    if probs[0]>=0.998:
        test_set[test_index[idx][0]]['evidence'].append([test_index[idx][1],int(test_index[idx][2])])

### Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:

In [None]:
dev_sentence_probs = []
#new a dev set for testing
new_dev_set = dict()
with open("selection_devastest_results.txt","r",encoding='utf-8') as f:
    for row in f.readlines():
        temp = row.strip(" \n][").split(" ")
        ts_probs = [float(temp[0]),float(temp[-1])]
        dev_sentence_probs.append(ts_probs)
for idx,data in dev_set.items():
    new_dev_set[idx] = dict()
    new_dev_set[idx]['claim'] = dev_set[idx]['claim']
    new_dev_set[idx]['label'] = ""
    new_dev_set[idx]['evidence'] = []
for idx,probs in enumerate(dev_sentence_probs):
    if probs[0]>=0.99:
        new_dev_set[dev_index_for_test[idx][0]]['evidence'].append([dev_index_for_test[idx][1],int(dev_index_for_test[idx][2])])
with open("new_dev_set_for_test.json","w",encoding = 'utf-8') as f: # store dev file without label
    json.dump(new_dev_set,f,indent=2)

#### Merge all evidence sentences in one evidence：

#### Train set:

In [None]:
final_train_list = []#for classification
for idx in train_set.keys():
    final_sentence = ""
    for sentence_info in train_set[idx]['evidence']:
        final_sentence = final_sentence + " " + wiki_content[' '.join(sentence_info[0].split('_'))][str(sentence_info[1])]
    final_train_list.append([train_set[idx]['claim'],final_sentence,train_set[idx]['label']])

In [None]:
with open('train_classification.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in final_train_list:
        tsv_writer.writerow(row)

#### Dev set:

In [None]:
final_dev_list = []#for classification
for idx in dev_set.keys(): 
    final_sentence = ""
    for sentence_info in dev_set[idx]['evidence']:
        final_sentence = final_sentence + " " + wiki_content[' '.join(sentence_info[0].split('_'))][str(sentence_info[1])]
    final_dev_list.append([dev_set[idx]['claim'],final_sentence,dev_set[idx]['label']])

In [None]:
with open('dev_classification.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in final_dev_list:
        tsv_writer.writerow(row)

#### Test set:

In [None]:
final_test_list = []#for classification
final_test_index = []
for idx in test_set.keys(): 
    final_sentence = ""
    for sentence_info in test_set[idx]['evidence']:
        final_sentence = final_sentence + " " + wiki_content[' '.join(sentence_info[0].split('_'))][str(sentence_info[1])]
    final_test_list.append([test_set[idx]['claim'],final_sentence])
    final_test_index.append(idx)

In [None]:
with open('test_classification.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in final_test_list:
        tsv_writer.writerow(row)

### Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:

In [None]:
final_devastest_list = []#for classification
final_devastest_index = []
for idx in new_dev_set.keys(): 
    final_sentence = ""
    for sentence_info in new_dev_set[idx]['evidence']:
        final_sentence = final_sentence + " " + wiki_content[' '.join(sentence_info[0].split('_'))][str(sentence_info[1])]
    final_devastest_list.append([new_dev_set[idx]['claim'],final_sentence])
    final_devastest_index.append(idx)

In [None]:
with open('devastest_classification.tsv', 'wt') as fp:
    tsv_writer = csv.writer(fp, delimiter='\t')
    for row in final_devastest_list:
        tsv_writer.writerow(row)

## Integration and output:

In [None]:
test_label_probs = []
with open("classification_test_results.txt","r",encoding='utf-8') as f:
    for row in f.readlines():
        temp = row.strip(" \n][").split()
        ts_probs = [float(temp[0]),float(temp[1]),float(temp[2])]
        test_label_probs.append(ts_probs)

In [None]:
temp_label = ["SUPPORTS","REFUTES","NOT ENOUGH INFO"]
for idx,probs in enumerate(test_label_probs):
    label_idx = probs.index(max(probs))
    test_set[final_test_index[idx]]['label'] = temp_label[label_idx]
with open("testoutput.json","w",encoding = 'utf-8') as f:
    json.dump(test_set,f,indent=2)

### Due to delayed submission of codalab, this part of code alternatively uses dev set as test set for alternative evaluation:

In [None]:
dev_label_probs = []
with open("classification_devastest_results.txt","r",encoding='utf-8') as f:
    for row in f.readlines():
        temp = row.strip(" \n][").split()
        ts_probs = [float(temp[0]),float(temp[1]),float(temp[2])]
        dev_label_probs.append(ts_probs)

In [None]:
temp_label = ["SUPPORTS","REFUTES","NOT ENOUGH INFO"]
for idx,probs in enumerate(dev_label_probs):
    label_idx = probs.index(max(probs))
    if len(new_dev_set[final_devastest_index[idx]]['evidence']) == 0:
        new_dev_set[final_devastest_index[idx]]['label'] = "NOT ENOUGH INFO"
with open("devastest_final_result.json","w",encoding = 'utf-8') as f:
    json.dump(new_dev_set,f,indent=2)

#### --------------------------------------------------------- END ------------------------------------------------------