# Prepare dataset (jsonl file)

- Prepare AbstRCT datasets for llama factory.

- Joint (acc, aric) task.

- We create the data files: `abstRCT_joint_train.json`, `abstRCT_joint_test_neo.json`, `abstRCT_joint_test_gla.json`, `abstRCT_joint_test_mix.json`

## Libraries

In [10]:
import os
import re
import ast
import json
import random
import pickle
import pandas as pd

from pathlib import Path

### Helper Functions

In [119]:
def get_triplet(x):

    return [x.adu_pos, x.parent_pos, x.afu]

In [120]:
def process_aty(x):

    x = x.aty
    x = x.split(" ")
    return x

In [121]:
def filter_nones(x):

    ar_l = x.relation_type

    rel_l = [item for item in ar_l if item[2] != 'none']

    return rel_l

In [122]:
def insert_ac_tags(x):

    abstract_text = x.abstract_text
    acs_list = x.acs_list
    ac_types_list = x.ac_types

    
    # for idx, ac in enumerate(acs_list):
    
    counter = 1
    for idx, (ac, ac_type) in enumerate(zip(acs_list, ac_types_list)):

        # if ac != '':
        
        if ac != '' and ac_type != 'none':

            # ac_tags_w_tags = f"<AC> " + ac + f" </AC>"
            
            ac_tags_w_tags = f"""<AC{counter}> """ + ac + f""" </AC{counter}>""" 
            abstract_text = abstract_text.replace(ac, ac_tags_w_tags) 
            counter += 1

    return abstract_text

In [123]:
def get_dataframe(dataset):

    rct_df = pd.read_csv(os.path.join(data_dir, dataset))
    rct_df['relation_type'] = rct_df.apply(lambda x: get_triplet(x), axis=1)
    rct_df = rct_df.drop_duplicates(subset=["text"], keep="first")   

    
    abstract_texts_df = rct_df.fillna('').groupby(["doc_id"]).agg({"text": "".join, "aty": " ".join}).reset_index()
    abstract_acs_df = rct_df.fillna('').groupby('doc_id')['text'].agg(list).reset_index()
    abstract_ars_df = rct_df.fillna('').groupby('doc_id')['relation_type'].agg(list).reset_index()
    
    abstract_texts_df["ac_types"] = abstract_texts_df.apply(lambda x: process_aty(x), axis=1)

    abstract_texts_df.rename(columns={'text': 'abstract_text'}, inplace=True)
    abstract_texts_df.drop(columns=['aty'], inplace=True)
    abstract_acs_df.rename(columns={'text': 'acs_list'}, inplace=True)
    
    df_merged_1 = pd.merge(abstract_texts_df, abstract_acs_df, on='doc_id')
    df_merged = pd.merge(df_merged_1, abstract_ars_df, on='doc_id')

    df_merged["tagged_abstract_text"] = df_merged.apply(lambda x: insert_ac_tags(x), axis=1)
    df_merged['relations_list'] = df_merged.apply(lambda x: filter_nones(x), axis=1)

    return df_merged    

## Load Data

In [124]:
abstRCT_dir = Path.cwd().parent / "am_reasoning" / "abstRCT"

In [125]:
abstRCT_dir
# /Utilisateurs/umushtaq/am_reasoning/abstRCT

PosixPath('/Utilisateurs/umushtaq/am_reasoning/abstRCT')

In [126]:
data_dir = abstRCT_dir / "data_files"

In [127]:
neo_train_df = get_dataframe("neo/train.csv")

In [128]:
neo_test_df = get_dataframe("neo/test.csv")

In [129]:
gla_test_df = get_dataframe("gla/test.csv")

In [130]:
mix_test_df = get_dataframe("mix/test.csv")

In [131]:
neo_train_df

Unnamed: 0,doc_id,abstract_text,ac_types,acs_list,relation_type,tagged_abstract_text,relations_list
0,7502428,"Single-agent therapy with bicalutamide, a non...","[none, Premise, Premise, Premise, Premise, Pre...","[ Single-agent therapy with bicalutamide, a no...","[[0, 0, none], [1, 8, attack], [2, 8, support]...","Single-agent therapy with bicalutamide, a non...","[[1, 8, attack], [2, 8, support], [3, 2, attac..."
1,7680374,"An open, randomized study was performed to as...","[none, Premise, Premise, Claim, Premise, Premi...","[ An open, randomized study was performed to a...","[[0, 0, none], [1, 8, support], [2, 0, none], ...","An open, randomized study was performed to as...","[[1, 8, support], [3, 9, support], [4, 8, supp..."
2,7786823,"From 1984 to 1989, the Swiss Group for Clinic...","[none, Premise, Premise, Premise, Premise, Pre...","[ From 1984 to 1989, the Swiss Group for Clini...","[[0, 0, none], [1, 8, support], [2, 8, support...","From 1984 to 1989, the Swiss Group for Clinic...","[[1, 8, support], [2, 8, support], [3, 8, supp..."
3,7989941,"Taxol (paclitaxel; Bristol-Myers Squibb, Walli...","[Premise, none, Premise, Premise, Premise, Pre...","[Taxol (paclitaxel; Bristol-Myers Squibb, Wall...","[[1, 0, none], [0, 0, none], [2, 9, support], ...","<AC1> Taxol (paclitaxel; Bristol-Myers Squibb,...","[[2, 9, support], [3, 7, attack], [4, 3, attac..."
4,8229122,The impact of the side effects of megestrol a...,"[none, Premise, Premise, Premise, Claim]",[ The impact of the side effects of megestrol ...,"[[0, 0, none], [1, 4, support], [2, 4, attack]...",The impact of the side effects of megestrol a...,"[[1, 4, support], [2, 4, attack], [3, 2, attack]]"
...,...,...,...,...,...,...,...
345,23989945,Care closer to home is being explored as a me...,"[none, Premise, Premise, none, Premise, Premis...",[ Care closer to home is being explored as a m...,"[[0, 0, none], [1, 7, support], [2, 6, support...",Care closer to home is being explored as a me...,"[[1, 7, support], [2, 6, support], [3, 7, supp..."
346,23993401,The optimal duration over which lung SBRT sho...,"[none, Premise, Premise, Premise, Premise, Cla...",[ The optimal duration over which lung SBRT sh...,"[[0, 0, none], [1, 5, support], [2, 6, support...",The optimal duration over which lung SBRT sho...,"[[1, 5, support], [2, 6, support], [3, 6, supp..."
347,24001635,Patients experience reductions in quality of ...,"[none, Premise, Premise, Claim, none]",[ Patients experience reductions in quality of...,"[[0, 0, none], [1, 3, support], [2, 3, support...",Patients experience reductions in quality of ...,"[[1, 3, support], [2, 3, support]]"
348,24067488,Chronic gastrointestinal symptoms after pelvi...,"[none, Premise, Premise, Claim, Claim]",[ Chronic gastrointestinal symptoms after pelv...,"[[0, 0, none], [1, 3, support], [2, 4, support...",Chronic gastrointestinal symptoms after pelvi...,"[[1, 3, support], [2, 4, support]]"


In [175]:
neo_train_df.iloc[10]['acs_list']

[' We conducted a prospective, randomized, multicentre clinical trial comparing the effects and costs of GM-CSF as an adjunct to intensive chemotherapy in elderly patients with acute myeloid leukaemia (AML). The patients were randomized to either daunomycin-cytosine arabinoside (control arm: n = 161) or daunomycin-cytosine arabinoside with GM-CSF (GM-CSF arm: n = 157). The primary end-point was the effect of GM-CSF on the percentage of complete remissions (CR). Survival duration, disease-free survival, quality of life and costs were evaluated separately.',
 'CR after remission induction treatment was achieved in 55% of the patients in the control group and in 56% of the patients in the GM-CSF group (P = NS).',
 'The duration of survival and disease-free survival at 2 years after randomization were estimated at 22% and 19% for the control group and 22% and 14% for the GM-CSF group (P = NS).',
 'Considering the short-term quality of life, the administration of GM-CSF resulted in more pro

In [176]:
neo_train_df.iloc[10]['ac_types'], len(neo_train_df.iloc[10]['ac_types'])

(['none',
  'Premise',
  'Premise',
  'Premise',
  'Premise',
  'Premise',
  'Premise',
  'Claim'],
 8)

In [177]:
neo_train_df.iloc[10]['relation_type']

[[0, 0, 'none'],
 [1, 0, 'none'],
 [2, 0, 'none'],
 [3, 0, 'none'],
 [4, 0, 'none'],
 [5, 7, 'support'],
 [6, 7, 'attack'],
 [7, 0, 'none']]

In [178]:
neo_train_df.iloc[10]['relations_list']

[[5, 7, 'support'], [6, 7, 'attack']]

In [168]:
def adjust_ac_idxes(row):
    
    ac_types = row['ac_types']
    relations_list = row['relations_list']
    
    index_map = {}
    new_labels = []
    new_relations = []
    
    for i, label in enumerate(ac_types):
        if label != 'none':
            index_map[i] = len(new_labels)
            new_labels.append(label)
            
  
    for src, tgt, rel_type in relations_list:
        if src in index_map and tgt in index_map:
            new_relations.append([index_map[src] + 1, index_map[tgt] + 1, rel_type])

    return new_labels, new_relations

In [174]:
adjust_ac_idxes(neo_train_df.iloc[0])

(['Premise',
  'Premise',
  'Premise',
  'Premise',
  'Premise',
  'Premise',
  'Premise',
  'Claim'],
 [[1, 8, 'attack'],
  [2, 8, 'support'],
  [3, 2, 'attack'],
  [4, 8, 'support'],
  [5, 8, 'support'],
  [6, 8, 'support'],
  [7, 8, 'attack']])

In [139]:
neo_train_df.to_pickle(data_dir / "neo_train_df.pkl")
neo_test_df.to_pickle(data_dir / "neo_test_df.pkl")
gla_test_df.to_pickle(data_dir / "gla_test_df.pkl")
mix_test_df.to_pickle(data_dir / "mix_test_df.pkl")

## Prepare prompt

In [140]:
def formatting_fct(instruction="", input="", output=""):
    
    prompt_d ={
            
        "instruction": f"""{instruction}""",
        "input": f"""{input}""",
        "output": f"""{output}"""
            
        }
    
    return prompt_d

In [141]:
# def write_instruction():

#     # results = json.dumps(["component_type (str)"] * nr_acs)

#     instruction = """### You are an expert in Argument Mining. You are given a biomedical abstract text which contains numbered argument components enclosed by <AC></AC> tags. Your task is to identify argument relations between argument components in the abstract text and classify their relation type as either "support" or "attack". You must return a list of triplets in the following JSON format: {"list_argument_relation_types": [[source AC (int), target AC (int), relation_type (str)], ..., [source AC (int), target AC (int), relation_type (str)]]} where each element "relation_type (str)" is replaced by either "support" or "attack".
# """
    
#     return instruction

In [142]:
def build_instruction():
    
    instruction = """### You are an expert in Argument Mining tasked with analyzing biomedical abstracts. 

INPUT:
You will receive:
- A biomedical abstract text.
- An enumerated list of identified arguments in the biomedical abstract.

TASK 1: Argument Classification
- Classify each argument in the biomedical abstract into one of the following categories: "MajorClaim"(M), "Claim"(C) or "Premise"(P).
- MajorClaim (M): Major claims are broad conclusions supported by specific claims or general statements about treatments or diseases.
- Claim (C): A claim is the author’s concluding statement, summarizing how the new treatment compares to existing treatments based on the study’s results.
- Premise (P): A premise is an observed or measured fact—such as side effects or outcomes—that supports or challenges a claim. 
- You must return a list of argument types in following JSON format: {"argument_types": [argument_types (str), argument_types (str), ..., argument_types (str)]}

TASK 2: Argument Relations Identification and Classification
- Identify relationships between arguments by determining which arguments support or attack other arguments.
- For each related argument pair, classify the relationship as either: "Support"(S) or "Attack"(A).
- You must return a list of triplets in the following JSON format: {"argument_relations": [(target_index (int), source_index (int), relation_type (str)), (target_index (int), source_index (int), relation_type (str)), ...]}
- Note: Indices are 1-based, referring to the position in the provided arguments list.

Example:

### Output:
{"argument_types": ['P', 'P', 'C', 'P', 'P', 'P', 'C', 'C', 'C', 'C']}
{"argument_relations": [(1, 8, 'S'), (3, 9, 'S'), (4, 8, 'S'), (5, 8, 'S'), (7, 6, 'A'), (9, 8, 'A'), (10, 8, 'A')]}
"""

    return instruction

In [143]:
print(build_instruction())

### You are an expert in Argument Mining tasked with analyzing biomedical abstracts. 

INPUT:
You will receive:
- A biomedical abstract text.
- An enumerated list of identified arguments in the biomedical abstract.

TASK 1: Argument Classification
- Classify each argument in the biomedical abstract into one of the following categories: "MajorClaim"(M), "Claim"(C) or "Premise"(P).
- MajorClaim (M): Major claims are broad conclusions supported by specific claims or general statements about treatments or diseases.
- Claim (C): A claim is the author’s concluding statement, summarizing how the new treatment compares to existing treatments based on the study’s results.
- Premise (P): A premise is an observed or measured fact—such as side effects or outcomes—that supports or challenges a claim. 
- You must return a list of argument types in following JSON format: {"argument_types": [argument_types (str), argument_types (str), ..., argument_types (str)]}

TASK 2: Argument Relations Identificat

In [144]:
def build_input(abstract_text, acs, ac_types):
    
    question = f"""### Abstract text:\n{abstract_text}\n"""    
    ac_l = [ac for ac, ac_type in zip(acs, ac_types) if ac_type != "none"]
    
    numbered_string = '\n'.join(f"{i+1}. {item}" for i, item in enumerate(ac_l))
    args = f"""### List of arguments in the abstract:\n{numbered_string}"""
    
    return question + args

In [145]:
def build_answer(relation_types, ac_types):
    
    ac_type_d = {"Premise": "P", "Claim": "C", "MajorClaim": "M"}
    rel_type_d = {"support": "S", "attack": "A"}

    output_indic = "### Output:\n"
    
    ac_types = [ac_type_d[label] for label in ac_types if label != 'none']   
    ac_types = f'{{"argument_types": {ac_types}}}\n'
    
    rels_types = [(i, j, rel_type_d[r_type]) for i, j , r_type in relation_types]
    rels_types = f'{{"relation_types": {rels_types}}}\n'
    
    
    #ac_types = ['Claim' if label == 'MajorClaim' else label for label in ac_types]
    return output_indic + ac_types + rels_types

## Prepare data files

### train file (neo)

In [146]:
data_file_train = []

for _, row in neo_train_df.iterrows():

    instruction = build_instruction()
    question = build_input(row.abstract_text, row.acs_list, row.ac_types)
    answer = build_answer(row.relations_list, row.ac_types)
    
    data_file_train.append( formatting_fct(instruction, question, answer) )

In [147]:
len(data_file_train)

350

In [148]:
print(data_file_train[0]['instruction'])

### You are an expert in Argument Mining tasked with analyzing biomedical abstracts. 

INPUT:
You will receive:
- A biomedical abstract text.
- An enumerated list of identified arguments in the biomedical abstract.

TASK 1: Argument Classification
- Classify each argument in the biomedical abstract into one of the following categories: "MajorClaim"(M), "Claim"(C) or "Premise"(P).
- MajorClaim (M): Major claims are broad conclusions supported by specific claims or general statements about treatments or diseases.
- Claim (C): A claim is the author’s concluding statement, summarizing how the new treatment compares to existing treatments based on the study’s results.
- Premise (P): A premise is an observed or measured fact—such as side effects or outcomes—that supports or challenges a claim. 
- You must return a list of argument types in following JSON format: {"argument_types": [argument_types (str), argument_types (str), ..., argument_types (str)]}

TASK 2: Argument Relations Identificat

In [149]:
print(data_file_train[0]['input'])

### Abstract text:
 Single-agent therapy with bicalutamide, a nonsteroidal antiandrogen, was compared with castration, either surgical or medical, in patients with untreated Stage D2 prostate cancer. In an open, randomized, multicenter trial, patients were randomized to treatment with 50 mg bicalutamide (n = 243) once daily or to castration (n = 243), either orchiectomy or depot injection of goserelin acetate every 28 days. Primary efficacy endpoints were times to treatment failure and objective disease progression and survival. Assessments included review of measurable metastases, prostate dimensions, Eastern Cooperative Oncology Group performance status, pain, analgesic requirements, and quality of life responses. The median duration of therapy was 39 weeks for bicalutamide-treated patients and 42 weeks for castrated patients; treatment failure occurred in 53% and 42% and disease progression in 43% and 33%, respectively.Treatment effects favored castration for both endpoints (P < or 

In [150]:
print(data_file_train[0]['output'])

### Output:
{"argument_types": ['P', 'P', 'P', 'P', 'P', 'P', 'P', 'C']}
{"relation_types": [(1, 8, 'A'), (2, 8, 'S'), (3, 2, 'A'), (4, 8, 'S'), (5, 8, 'S'), (6, 8, 'S'), (7, 8, 'A')]}



### test file (neo)

In [151]:
data_file_test_neo = []

for _, row in neo_test_df.iterrows():    
    
    instruction = build_instruction()
    question = build_input(row.abstract_text, row.acs_list, row.ac_types)
    answer = build_answer(row.relations_list, row.ac_types)
    
    data_file_test_neo.append( formatting_fct(instruction, question, answer) )

In [152]:
len(data_file_test_neo)

100

In [153]:
print(data_file_test_neo[0]['instruction'])

### You are an expert in Argument Mining tasked with analyzing biomedical abstracts. 

INPUT:
You will receive:
- A biomedical abstract text.
- An enumerated list of identified arguments in the biomedical abstract.

TASK 1: Argument Classification
- Classify each argument in the biomedical abstract into one of the following categories: "MajorClaim"(M), "Claim"(C) or "Premise"(P).
- MajorClaim (M): Major claims are broad conclusions supported by specific claims or general statements about treatments or diseases.
- Claim (C): A claim is the author’s concluding statement, summarizing how the new treatment compares to existing treatments based on the study’s results.
- Premise (P): A premise is an observed or measured fact—such as side effects or outcomes—that supports or challenges a claim. 
- You must return a list of argument types in following JSON format: {"argument_types": [argument_types (str), argument_types (str), ..., argument_types (str)]}

TASK 2: Argument Relations Identificat

In [154]:
print(data_file_test_neo[0]['input'])

### Abstract text:
 To investigate the effects of medroxyprogesterone acetate (MPA) on appetite, weight, and quality of life (QL) in patients with advanced-stage, incurable, non-hormone-sensitive cancer. Two hundred six eligible patients were randomized between double-blind MPA 500 mg twice daily or placebo. Appetite (0 to 10 numerical rating scale), weight, and QL (European Organization for Research and Treatment of Cancer Quality of Life Questionnaire [EORTC-QLQ-C30]) were assessed before the start of treatment (t = 0), and 6 weeks (t = 6) and 12 weeks (t = 12) thereafter. One hundred thirty-four patients (68 MPA and 66 placebo) were assessable at t = 6 and 99 patients (53 MPA and 46 placebo) at t = 12.A beneficial effect of MPA on appetite was observed after both 6 weeks (P = .008) and 12 weeks (P = .01) of treatment.After 12 weeks, a mean weight gain of 0.6 +/- 4.4 kg was seen in the MPA, versus an ongoing mean weight loss of 1.4 +/- 4.6 kg in the placebo group.This difference of 2

In [155]:
print(data_file_test_neo[0]['output'])

### Output:
{"argument_types": ['P', 'P', 'P', 'P', 'C', 'P', 'C', 'C']}
{"relation_types": [(1, 7, 'S'), (2, 7, 'S'), (3, 2, 'S'), (4, 5, 'S'), (5, 8, 'S'), (6, 7, 'S'), (8, 7, 'A')]}



In [156]:
# Error!
# Error corrected !

### test file (gla)

In [157]:
data_file_test_gla = []

for _, row in gla_test_df.iterrows():    
    
    instruction = build_instruction()
    question = build_input(row.abstract_text, row.acs_list, row.ac_types)
    answer = build_answer(row.relations_list, row.ac_types)
    
    data_file_test_gla.append( formatting_fct(instruction, question, answer) )

In [158]:
len(data_file_test_gla)

100

In [159]:
print(data_file_test_gla[0]['instruction'])

### You are an expert in Argument Mining tasked with analyzing biomedical abstracts. 

INPUT:
You will receive:
- A biomedical abstract text.
- An enumerated list of identified arguments in the biomedical abstract.

TASK 1: Argument Classification
- Classify each argument in the biomedical abstract into one of the following categories: "MajorClaim"(M), "Claim"(C) or "Premise"(P).
- MajorClaim (M): Major claims are broad conclusions supported by specific claims or general statements about treatments or diseases.
- Claim (C): A claim is the author’s concluding statement, summarizing how the new treatment compares to existing treatments based on the study’s results.
- Premise (P): A premise is an observed or measured fact—such as side effects or outcomes—that supports or challenges a claim. 
- You must return a list of argument types in following JSON format: {"argument_types": [argument_types (str), argument_types (str), ..., argument_types (str)]}

TASK 2: Argument Relations Identificat

In [160]:
print(data_file_test_gla[0]['input'])

### Abstract text:


In a randomized clinical trial, the authors compared the use of postoperative subconjunctival injections of 5-fluorouracil (5-FU) in 19 eyes with a single intraoperative application of subconjunctival mitomycin (MMC) at the filtering site in 20 eyes at high risk for failure of glaucoma filtering surgery.Six months after surgery, intraocular pressures averaged 10.9 +/- 5.3 mmHg (mean +/- standard deviation) in the MMC-treated eyes versus 14.2 +/- 5.5 mmHg in the 5-FU-treated eyes (P = 0.08) and were less than or equal to 12 mmHg in 60.0% of MMC-treated eyes and 21.1% of 5-FU-treated eyes (P = 0.03).Mitomycin-treated eyes were receiving an average of 0.3 +/- 0.5 medications for intraocular pressure control, and 5-FU-treated eyes were receiving an average of 1.1 +/- 1.1 medications (P = 0.01).Drug-induced corneal epithelial defects were seen in nine 5-FU-treated eyes and in no MMC-treated eyes (P = 0.0004).These results suggest that intraoperative MMC may be a viable 

In [161]:
print(data_file_test_gla[0]['output'])

### Output:
{"argument_types": ['P', 'P', 'C']}
{"relation_types": [(1, 3, 'S'), (2, 3, 'S')]}



In [162]:
# Error!
# Error corrected!

### test file (mix)

In [163]:
data_file_test_mix = []

for _, row in mix_test_df.iterrows():    
    
    instruction = build_instruction()
    question = build_input(row.abstract_text, row.acs_list, row.ac_types)
    answer = build_answer(row.relations_list, row.ac_types)
    
    data_file_test_mix.append( formatting_fct(instruction, question, answer) )

In [164]:
len(data_file_test_mix)

100

In [165]:
print(data_file_test_mix[0]['instruction'])

### You are an expert in Argument Mining tasked with analyzing biomedical abstracts. 

INPUT:
You will receive:
- A biomedical abstract text.
- An enumerated list of identified arguments in the biomedical abstract.

TASK 1: Argument Classification
- Classify each argument in the biomedical abstract into one of the following categories: "MajorClaim"(M), "Claim"(C) or "Premise"(P).
- MajorClaim (M): Major claims are broad conclusions supported by specific claims or general statements about treatments or diseases.
- Claim (C): A claim is the author’s concluding statement, summarizing how the new treatment compares to existing treatments based on the study’s results.
- Premise (P): A premise is an observed or measured fact—such as side effects or outcomes—that supports or challenges a claim. 
- You must return a list of argument types in following JSON format: {"argument_types": [argument_types (str), argument_types (str), ..., argument_types (str)]}

TASK 2: Argument Relations Identificat

In [166]:
print(data_file_test_mix[0]['input'])

### Abstract text:
 To evaluate the efficacy and safety of a slow-release formulation of cytarabine (DepoCyt; Chiron Corp, Emeryville, CA, and Skye Pharma, Inc, San Diego, CA) that maintains cytotoxic concentrations of cytarabine (ara-C) in the CSF of most patients for more than 14 days. Twenty-eight patients with lymphoma and a positive CSF cytology were randomized to receive DepoCyt 50 mg once every 2 weeks or free ara-C 50 mg twice a week for 1 month. Patients whose CSF cytology converted to negative and who did not have neurologic progression received an additional 3 months of consolidation therapy and then 4 months of maintenance therapy. All patients received dexamethasone 4 mg orally bid on days 1 through 5 of each 2-week cycle.The response rate was 71% for DepoCyt and 15% for ara-C on an intent-to-treat basis (P =.006).All of the patients on the DepoCyt arm but only 53% of those on the ara-C arm were able to complete the planned 1-month induction therapy regimen.Time to neurolo

In [167]:
print(data_file_test_mix[0]['output'])

### Output:
{"argument_types": ['P', 'P', 'P', 'P', 'P', 'C']}
{"relation_types": [(1, 6, 'S'), (4, 6, 'S')]}



In [34]:
# Error!
# Error corrected!

## Save `jsonl` files

In [35]:
file_path = os.path.join(os.getcwd(), "../datasets/abstRCT_joint_train_neo.json")

with open(file_path, 'w') as file:
    
    json.dump(data_file_train, file)

In [36]:
file_path = os.path.join(os.getcwd(), "../datasets/abstRCT_joint_test_neo.json")

with open(file_path, 'w') as file:
    
    json.dump(data_file_test_neo, file)

In [37]:
file_path = os.path.join(os.getcwd(), "../datasets/abstRCT_joint_test_gla.json")

with open(file_path, 'w') as file:
    
    json.dump(data_file_test_gla, file)

In [38]:
file_path = os.path.join(os.getcwd(), "../datasets/abstRCT_joint_test_mix.json")

with open(file_path, 'w') as file:
    
    json.dump(data_file_test_mix, file)