# Run one time on new server

In [None]:
from scripts.qasrl_gs_utils import combine_files_tag_with_sentences

combine_files_tag_with_sentences("qasrl_gs/data/gold/wikinews.dev.gold.csv", "qasrl_gs/data/sentences/wikinews.dev.full.csv", "qasrl_gs/data/gold/wikinews.dev.combined.csv")
combine_files_tag_with_sentences("qasrl_gs/data/gold/wikinews.test.gold.csv", "qasrl_gs/data/sentences/wikinews.test.full.csv", "qasrl_gs/data/gold/wikinews.test.combined.csv")

In [None]:
!pip install -r requirements.txt
!python -m spacy download en_core_web_sm

# Run on every notebook restart

In [None]:
# Imports

from run_summarization import main
from run_evaluation import evaluate
import os
import sys
import json

# General variables

run = None  # wandb run
tmp_dir = os.environ.get("TMPDIR", "/tmp")

# Params

### Data params

qasrl_2015_params = ['--dataset_name', 'qa_srl']
qasrl_2020_params = [
    "--train_file", "qasrl_gs/data/gold/wikinews.dev.combined.csv",
    "--validation_file", "qasrl_gs/data/gold/wikinews.dev.combined.csv",
    "--test_file", "qasrl_gs/data/gold/wikinews.test.combined.csv",
    "--text_column", "sentence", 
    "--summary_column", "answer"
]

### Model params

t5_model_dir = f'{tmp_dir}/t5-tst-summarization'
os.environ["T5_MODEL_DIR"] = t5_model_dir
t5_small_model_train_params = [
    '--model_name_or_path', 't5-small'
]
t5_model_predict_params = [
    '--model_name_or_path', t5_model_dir
]
t5_extra_params = [
    '--model_type', 't5',
    '--source_prefix', 'summarize: ',
    '--output_dir', t5_model_dir
]

bart_model_dir = f'{tmp_dir}/bart-tst-summarization'
os.environ["BART_MODEL_DIR"] = bart_model_dir
bart_base_model_train_params = [
    '--model_name_or_path', 'facebook/bart-base'
]
bart_model_predict_params = [
    '--model_name_or_path', bart_model_dir
]
bart_extra_params = [
    '--model_type', 'bart',
    '--output_dir', bart_model_dir
]


# Train, predict and evaluate

### (0) Run config

In [None]:
model_type = "bart"
# model_type = "t5"

qasrl_train_dataset = "2015"
# qasrl_train_dataset = "2018"

# qasrl_test_dataset = "2015"
qasrl_test_dataset = "2020"

### (1) Train

In [None]:
sys.argv = [
    'run_summarization.py',
    '--do_train',
    '--do_eval',
    '--per_device_train_batch_size', '4',
    '--per_device_eval_batch_size', '4',
    '--logging_steps', '100',
    '--num_train_epochs', '3.0',
    '--overwrite_output_dir',
    '--report_to', 'wandb'    
]

if model_type == "t5":
    sys.argv.extend(t5_small_model_train_params)
    sys.argv.extend(t5_extra_params)
elif model_type == "bart":
    sys.argv.extend(bart_base_model_train_params)
    sys.argv.extend(bart_extra_params)
else:
    raise ValueError(f"model_type doesn't exist ; model_type {model_type}")

if qasrl_train_dataset == "2015":
    sys.argv.extend(qasrl_2015_params)
elif qasrl_train_dataset == "2018":
    raise ValueError("qasrl_train_dataset 2018 not supported yet")
else:
    raise ValueError(f"qasrl_train_dataset doesn't exist ; qasrl_train_dataset {qasrl_train_dataset}")

_, run = main()

### (2) Predict

In [None]:
# !python run_summarization.py --model_name_or_path $TMPDIR/tst-summarization --do_predict --dataset_name qa_srl --output_dir $TMPDIR/tst-summarization --source_prefix "summarize: " --predict_with_generate
sys.argv = [
    'run_summarization.py',
    '--do_predict',
    '--predict_with_generate',
    '--eval_accumulation_steps', '10',  # Necessary to avoid OOM where all predictions are kept on one GPU    
    '--report_to', 'wandb',
    '--wandb_run_name', run.name if run else None
]

if model_type == "t5":
    sys.argv.extend(t5_extra_params)
    sys.argv.extend(t5_model_predict_params)
elif model_type == "bart":
    sys.argv.extend(bart_extra_params)
    sys.argv.extend(bart_model_predict_params)
else:
    raise ValueError(f"model_type doesn't exist ; model_type {model_type}")    

if qasrl_test_dataset == "2015":
    sys.argv.extend(qasrl_2015_params)
elif qasrl_test_dataset == "2020":
    sys.argv.extend(qasrl_2020_params)
else:
    raise ValueError(f"qasrl_test_dataset doesn't exist ; qasrl_test_dataset {qasrl_test_dataset}")

main()

### (3) Run state machine using docker, for parsing the predicted questions into 7 slot format

In [None]:
if model_type == "t5":
    os.environ["MODEL_DIR"] = t5_model_dir
elif model_type == "bart":
    os.environ["MODEL_DIR"] = bart_model_dir
else:
    raise ValueError(f"model_type doesn't exist ; model_type {model_type}")    

!docker run -it -v "${MODEL_DIR}:/data" -v "$(pwd)/../qasrl_bart/qasrl_gs/data/sentences/:/sentences_data" --rm --name qasrl hirscheran/qasrl_state_machine_example "file" "/data/generated_predictions.csv" "/sentences_data/wikinews.test.full.csv" "/data/output_file.csv"

### (4) Evaluate

In [None]:
if model_type == "t5":
    model_dir = t5_model_dir
elif model_type == "bart":
    model_dir = bart_model_dir
else:
    raise ValueError(f"model_type doesn't exist ; model_type {model_type}")    


evaluate("qasrl_gs/data/gold/wikinews.test.gold.csv", f"{model_dir}/output_file.csv", None, None)

# Run multiple experiments of train, predict and evaluate

In [None]:

# batch_size = 4
# model_name_or_path = 't5-small'

# # for batch_size in [4, 16]:
# for model_params in [t5_small_model_params, bart_model_params]:
# # for preprocess_output_func in ['all', 'first_two_question_answer']:
#     sys.argv = [
#         'run_summarization.py',
#         '--do_train',
#         '--do_eval',
#         '--do_predict',
#         '--predict_with_generate',
#         '--do_predict_based_on_predictions_file',
#         '--dataset_name', 'qa_srl',
#         '--output_dir', f'{tmp_dir}/tst-summarization',
#         '--per_device_train_batch_size', str(batch_size),
#         '--per_device_eval_batch_size', str(batch_size),
#         '--num_train_epochs', '3.0',
#         '--overwrite_output_dir',
#         '--eval_accumulation_steps', '10',  # Necessary to avoid OOM where all predictions are kept on one GPU
#         '--report_to', 'wandb'    
#     ]
    
#     sys.argv.extend(model_params)

#     main()

# Debugging stuff (debug mode flag and more)

In [None]:
# !python run_summarization.py --model_name_or_path $TMPDIR/tst-summarization --do_predict --dataset_name qa_srl --output_dir $TMPDIR/tst-summarization --source_prefix "summarize: " --predict_with_generate --debug_mode
sys.argv = [
    'run_summarization.py',
    '--model_name_or_path', f'{tmp_dir}/tst-summarization',
    '--do_predict',
    '--dataset_name', 'qa_srl',
    '--output_dir', f'{tmp_dir}/tst-summarization',
    '--source_prefix', 'summarize: ',
    '--predict_with_generate',
    '--eval_accumulation_steps', '10',  # Necessary to avoid OOM where all predictions are kept on one GPU        
    '--debug_mode'
]
main()

In [None]:
with open ("/home/nlp/hirsche5/tmp/tst-summarization/generated_predictions.json") as f:
    predictions = json.loads(f.read())
list(zip(predictions['inputs'], predictions['labels'], predictions['predictions']))[0]

In [None]:
# !python run_summarization.py --model_name_or_path $TMPDIR/tst-summarization --do_predict_based_on_predictions_file --dataset_name qa_srl --output_dir $TMPDIR/tst-summarization --source_prefix "summarize: " --debug_mode --report_to "wandb"
sys.argv = [
    'run_summarization.py',
    '--model_name_or_path', f'{tmp_dir}/tst-summarization',
    '--do_predict_based_on_predictions_file',
    '--dataset_name', 'qa_srl',
    '--output_dir', f'{tmp_dir}/tst-summarization',
    '--source_prefix', 'summarize: ',
    '--debug_mode'
]
main()