In [55]:
import csv
import json
import numpy as np
import os
import uuid
from tqdm import tqdm
import time
from parser import args_parser

class PlatformDataType():
    def __init__(self, data_root, subname, modelname, topk):

        self.topk = topk
        self.final_data = dict()
        self.jsonf = dict()
        self.metadata = dict()


        self.content = list()

        with open(data_root, "r") as f:
            self.json_data = json.load(f)


        self.create_metadata(subname)        #create the meta data information

        self.add_predictions(modelname)

        self.final_data["meta_data"] = self.metadata
        self.final_data["content"] = self.content

    def create_metadata(self, subname):
        self.metadata["session_id"] = "all"
        self.metadata["subject"] = subname
        self.metadata["level"] = "all"
        self.metadata["estimated_duration"] = "unknown"
        self.metadata["task_type"] = "all"

    def transform_gtruth (self):
        self.content = list()


        for i, inst in enumerate(self.json_data):

            one_mcq = dict()
            ground_truth = list()

            answers = inst['answer'].strip("\n")
            question = inst['question'].strip("\n")
            distractors = inst['distractors']
            #language = inst["language"]
            uuid_qid= inst["qid"]

            one_mcq["qid"] = uuid_qid
            one_mcq["question"] = question
            one_mcq["answers"] = answers

            #one_mcq["language"] = language
            dist_ctr = 0

            for dist in distractors:
                one_distractor = dict()
                dist = dist.strip("\n")
                dist = dist.strip()

                if len(dist) != 0:
                    dist_ctr = dist_ctr + 1

                    one_distractor["distid"] = dist_ctr
                    one_distractor["distractor"] = dist

                    one_distractor["modelid"] = "human"
                    one_distractor["score"] = 1

                    ground_truth.append(one_distractor)

            one_mcq["ground_truth"] = ground_truth
            self.content.append(one_mcq)


    def add_predictions(self, modname):
        self.transform_gtruth()               #transform the ground truth to the ground truth format of the final data type of the annotation platfrom


        for index, q_info in enumerate(self.content):
            predictions = list()

            question = q_info["question"]
            answer = q_info['answers']
            qid = q_info["qid"]

            #self.model_predictions, prompt = self.get_predictions(qid, modname, topk=self.topk)
            self.model_predictions = self.get_predictions(qid, modname, topk=self.topk)

            #output = self.model_predictions
            
            #print(f'Len (model.predictions), {len(self.model_predictions)}')
            #print("__________________________________________________________________ \n")
            #print("question: ", question)
            #print("answer: " , answer)
            #print("prompt: ", prompt)

            for dist, score in self.model_predictions:
                #print(dist, score)

                #for dist, score in output:

                print(index, dist, score)
                
                

                dist_info = dict()

                d_id = uuid.uuid4()
                dist_info["distid"] = str(d_id)
                dist_info["distractor"] = dist
                dist_info["modelid"] = modname
                dist_info["score"] = score
                predictions.append(dist_info)
            #print("__________________________ENNNNNNNNNNNNNNNNNNDDDDDDDDDD___________________________ \n")
            self.content[index]["proposed_distractors"] = predictions

    def get_predictions(self, qid, modname, topk):
        assert modname in ["few-shot", "zero-shot", "mt5"], f"modname expects few-shot, zero-shot or mt5 model names but, got: {modname}"

        predictions_folder = "predictions-" + str(modname)
        

        json_fname = predictions_folder + "/" + str(qid) + ".json"
        with open(json_fname, "r") as f:
            result = json.load(f)

        predicted_distractors = result["response"]
        predicted_distractors = predicted_distractors.split("\n")
        cleaned_distractors = []

        for i in predicted_distractors:
            d = i.strip()
            d = d.strip("\n")
            if len(d)!=0:
                d = d.split(" ")
                rank = d[0]
                body = " ".join(d[1:])
                #print(f'rank: {rank} body: {body}')

                cleaned_distractors.append((body, rank))
                

        #return cleaned_distractors[:topk],  result["input"]
        return cleaned_distractors[:topk]


    def get_json(self):
        self.jsonf["meta_data"] = self.metadata
        self.jsonf["content"] = self.content
        return self.jsonf

    def total_question(self):
        return len(self.content)

    @staticmethod
    def get_all_fnames(folder):
        files = list()
        fpaths = list()
        fpaths.append(folder)

        for fpath in tqdm(fpaths, desc="Looping over fpaths"):
            files.extend(os.listdir(fpath))
        return files


In [58]:
new_data = PlatformDataType("test-data/english.json", "English", "few-shot", topk=10)

0 theirself 1.
0 theirselves 2.
0 themself 3.
0 themselfs 4.
0 theirs 5.
0 them 6.
0 they 7.
0 them is 8.
0 theyself 9.
0 themselfes 10.
1 the supervisor's questions 1.
1 the supervisor question 2.
1 supervisors question 3.
1 interesting the supervisors' question 4.
1 supervisors question interesting 5.
1 question interesting supervisors 6.
1 interesting supervisors' question 7.
1 question of interesting supervisors 8.
1 supervisor question interesting 9.
1 supervisors' interesting question 10.
2 movie's 1.
2 TV's 2.
2 car's 3.
2 house's 4.
2 cat's 5.
2 dog's 6.
2 restaurant's 7.
2 phone's 8.
2 computer's 9.
2 game's 10.
3 profit 1.
3 expenditure 2.
3 savings 3.
3 donation 4.
3 expense 5.
3 charity 6.
3 gift 7.
3 loss 8.
3 liability 9.
3 debt 10.
4 then 1.
4 also 2.
4 because 3.
4 since 4.
4 therefore 5.
4 except 6.
4 until 7.
4 while 8.
4 whereas 9.
4 despite 10.
5 Nephew 1.
5 Stepfather 2.
5 Cousin 3.
5 Son-in-law 4.
5 Stepbrother 5.
5 Great uncle 6.
5 Father-in-law 7.
5 Half-brother