In [1]:
import json

hparamseval = {
    "model_size": "small",
    "task_names": ["squad"],
    "do_train": "False",
    "do_eval": "True",
    "init_checkpoint": "data/models/electra_small/finetuning_models/squad_model_1/"
}

with open("hparamseval.json", "w") as f:
    json.dump(hparamseval, f)

In [2]:
PRED_FILE = 'data/models/electra_small/results/squad_qa/squad_preds.json'
INFERENCE_DATA_FILE = 'data/finetuning_data/squad/dev.json'

def readInferenceData(data_file):
    f = open(data_file)
    data = json.load(f)

    qas = []
    for i in data['data']:
        for j in i['paragraphs']:
            context = j['context']
            for k in j['qas']:
                obj = {'id': k['id'], 'question': k['question'], 'context': context}
                qas.append(obj)
    f.close()
    return qas

def readPredictions(predictions_file):
    f = open(predictions_file)
    data = json.load(f)
    f.close()

    return data

def mapPredictions(qas, preds):
    mappedQas = []
    for qa in qas:
        obj = qa
        try:
            answer = preds[qa['id']]
        except:
            continue
        obj['answer'] = answer
        mappedQas.append(obj)
    return mappedQas

def printAnswers(data, verbose, file_path=''):
    if file_path:
        with open(file_path, 'w') as result_file:
            lines = []
            for i, qa in enumerate(data):
                question = qa['question']
                answer = qa['answer']
                context = qa['context']
                lines.append(f'{context},{question},{answer}\n')

            result_file.writelines(lines)
        return

    if verbose:
        print(json.dumps(data, indent=4))
    else:
        for i, qa in enumerate(data):
            question = qa['question']
            answer = qa['answer']
            context = qa['context']
            print(f'({i+1}): \nContext: {context} \nQuestion: {question} \nAnswer: {answer}\n')

In [3]:
class InferenceDataManager:
    version = ""
    data = []

    def __init__(self, paragraphs):
        self.version = "v2.0"
        self.data = [{"title": "inference", "paragraphs": paragraphs}]

    def getObject(self):
        obj = {"version": self.version, "data": self.data}
        return obj

In [4]:
#TESTING THE MODEL

import random
import json
import os

DATASET_METADATA_FILE = 'data/models/electra_small/finetuning_tfrecords/squad_tfrecords/squad_dev.metadata'
TEST_SET_FILE = 'inference_test/task_2_test_set_questions.txt'
RESULT_OUTPUT_FILE = 'inference_test/results/1813003_10-epoch.csv'

def generateInferenceFileUserInput():
    paragraphs = []
    while 1:
        context = input("Specify the context of the questions: \n\n TYPE x to end selection")
        if context == 'x':
            break
        qas = []
        while 1:
            question = input(f'Specify the question for context: \n\n {context} \n\n TYPE x to choose specify another context')
            if question == 'x':
                break
            qa = {
                "question": question,
                "id": ''.join(random.choice('0123456789abcdef') for i in range(24)),
                "is_impossible": "",
                "answers": []
            }
            qas.append(qa)
        paragraph = {"qas": qas, "context": context}
        paragraphs.append(paragraph)

    inf_data_file_json = InferenceDataManager(paragraphs).getObject()
    return inf_data_file_json

def generateInferenceFileFromTestSet():
    paragraphs = []
    with open(TEST_SET_FILE, 'r') as file:
        for line in file:
            context, question = line.split(' ,')
            qas = []
            qas.append({
                "question": question.rstrip('\n'),
                "id": ''.join(random.choice('0123456789abcdef') for i in range(24)),
                "is_impossible": "",
                "answers": []
            })
            paragraph = {"qas": qas, "context": context}
            paragraphs.append(paragraph)

    inf_data_file_json = InferenceDataManager(paragraphs).getObject()
    return inf_data_file_json

def saveInferenceFile(file_path, json_data):
    with open(file_path, 'w') as outfile:
        json.dump(json_data, outfile, indent=4)
    if os.path.exists(DATASET_METADATA_FILE):
        os.remove(DATASET_METADATA_FILE)

In [5]:
data = generateInferenceFileUserInput()
# data = generateInferenceFileFromTestSet()
saveInferenceFile(INFERENCE_DATA_FILE, data)

%run -i electra/run_finetuning.py --data-dir data --model-name electra_small --hparams "hparamseval.json"

data = readInferenceData(INFERENCE_DATA_FILE)
preds = readPredictions(PRED_FILE)
mappedData = mapPredictions(data, preds)

# printAnswers(mappedData, True)
printAnswers(mappedData, False)
# printAnswers(mappedData, False, RESULT_OUTPUT_FILE)

{'model_size': 'small', 'task_names': ['squad'], 'do_train': 'False', 'do_eval': 'True', 'init_checkpoint': 'data/models/electra_small/finetuning_models/squad_model_1/'}
Config: model=electra_small, trial 1/1
answerable_classifier True
answerable_uses_start_logits True
answerable_weight 0.5
beam_size 20
data_dir data
debug False
do_eval True
do_lower_case True
do_train False
doc_stride 128
double_unordered True
embedding_size 128
eval_batch_size 8
gcp_project None
init_checkpoint data/models/electra_small/finetuning_models/squad_model_1/
iterations_per_loop 1000
joint_prediction True
keep_all_models True
layerwise_lr_decay 0.8
learning_rate 0.0001
log_examples False
max_answer_length 30
max_query_length 64
max_seq_length 512
model_dir data\models\electra_small\finetuning_models\squad_model
model_hparam_overrides {}
model_name electra_small
model_size small
n_best_size 20
n_writes_test 5
num_tpu_cores 1
num_train_epochs 10.0
num_trials 1
predict_batch_size 8
preprocessed_data_dir data\m