In [2]:
"""
A script to:
 - read a JSONL (JSON Lines) dataset into objects of class PassageQuestion
 - write the PassageQuestion objects to another JSONL file

"""
import json,argparse

def load_jsonl(input_path) -> list:
    """
    Read list of objects from a JSON lines file.
    """
    data = []
    with open(input_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line.rstrip('\n|\r')))
    print('Loaded {} records from {}'.format(len(data), input_path))
    return data

def dump_jsonl(data, output_path, append=False):
    """
    Write list of objects to a JSON lines file.
    """
    mode = 'a+' if append else 'w'
    with open(output_path, mode, encoding='utf-8') as f:
        for line in data:
            json_record = json.dumps(line, ensure_ascii=False)
            f.write(json_record + '\n')
    print('Wrote {} records to {}'.format(len(data), output_path))

class Answer():
    def __init__(self,dictionary) -> None:
        self.text = dictionary["text"]
        self.start_char = dictionary["start_char"]

    def to_dict(self) -> dict:
        answer_dict = {
        "text":self.text,
        "start_char":self.start_char
        }
        return answer_dict

class PassageQuestion():
    def __init__(self,dictionary) -> None:
        self.pq_id = None
        self.passage = None
        self.surah = None
        self.verses = None
        self.question = None
        self.answers = []
        self.pq_id = dictionary["pq_id"]
        self.passage = dictionary["passage"]
        self.surah = dictionary["surah"]
        self.verses = dictionary["verses"]
        self.question = dictionary["question"]
        for answer in dictionary["answers"]:
            self.answers.append(Answer(answer))

    def to_dict(self) -> dict:
        passge_question_dict = {
        "pq_id":self.pq_id,
        "passage":self.passage,
        "surah":self.surah,
        "verses":self.verses,
        "question":self.question,
        "answers":[x.to_dict() for x in self.answers]
        }
        return passge_question_dict

def read_JSONL_file(file_path) -> list:
    data_in_file = load_jsonl(file_path)

    # get list of PassageQuestion objects
    passage_question_objects = []
    for passage_question_dict in data_in_file:
        # instantiate a PassageQuestion object
        pq_object = PassageQuestion(passage_question_dict)
        print (f"pq_id: {pq_object.pq_id}")
        passage_question_objects.append(pq_object)

    print(f"Collected {len(passage_question_objects)} Object from {file_path}")
    return passage_question_objects

def write_to_JSONL_file(passage_question_objects,output_path) -> None:

    # list of dictionaries for the passage_question_objects
    dict_data_list = []
    for pq_object in passage_question_objects:
        dict_data = pq_object.to_dict()
        dict_data_list.append(dict_data)
    dump_jsonl(dict_data_list,output_path)



In [10]:
train_file = '../quranqa/datasets/qrcd_v1.1_train.jsonl'
with open(train_file) as f:
    lines = f.read().splitlines()

import pandas as pd
df_inter = pd.DataFrame(lines)
df_inter.columns = ['json_element']

import json
df_inter['json_element'].apply(json.loads)

df = pd.json_normalize(df_inter['json_element'].apply(json.loads))

In [11]:
df.head()

Unnamed: 0,pq_id,passage,surah,verses,question,answers
0,2:8-16_364,ومن الناس من يقول آمنا بالله وباليوم الآخر وما...,2,8-16,لماذا سيُحاسب ويُعذب الضال يوم القيامة ان كان ...,"[{'text': 'أولئك الذين اشتروا الضلالة بالهدى',..."
1,2:174-176_364,إن الذين يكتمون ما أنزل الله من الكتاب ويشترون...,2,174-176,لماذا سيُحاسب ويُعذب الضال يوم القيامة ان كان ...,[{'text': 'أولئك الذين اشتروا الضلالة بالهدى و...
2,14:47-52_364,فلا تحسبن الله مخلف وعده رسله إن الله عزيز ذو ...,14,47-52,لماذا سيُحاسب ويُعذب الضال يوم القيامة ان كان ...,"[{'text': 'ليجزي الله كل نفس ما كسبت', 'start_..."
3,17:12-17_364,وجعلنا الليل والنهار آيتين فمحونا آية الليل وج...,17,12-17,لماذا سيُحاسب ويُعذب الضال يوم القيامة ان كان ...,"[{'text': 'كل إنسان ألزمناه طائره في عنقه', 's..."
4,18:29-31_364,وقل الحق من ربكم فمن شاء فليؤمن ومن شاء فليكفر...,18,29-31,لماذا سيُحاسب ويُعذب الضال يوم القيامة ان كان ...,"[{'text': 'من شاء فليؤمن ومن شاء فليكفر', 'sta..."
