# Stage 2 Screening - Title & Abstract  
The following notebook retrieves the data-frame of papers that have passed duplication removal. The notebook loops through papers, presenting the title and abstract to the reviewer. The reviewer has the option to:

1. accept,  
2. reject,  
3. don't know,     
4. return to later
5. save and stop  

For `return to later`, the script will go to the next result but leave the previous result as unviewed. For `save and stop`, the script will save the data-frame to .csv and exit the loop. The script will also always ask you if you want to add comments and will prompt a input bar. The reviewer can then re-run the Notebook and pick up from where they left off. Note, there are two version of this notebook and the data-frame it reads from. One for each reviewer (L.B. & L.D.).  

During each reviewing session, the data-frame will be read in and find the row where the reviewer last finished their work - this is row 0 if the reviewer has not started. New rows are created to a.) note whether the review has viewed the paper yet  b.) what their decision is and c.) if they have any additional comments.

In [3]:
# import modules
import os
import pandas as pd
from IPython.display import display, Markdown, clear_output
from tqdm.notebook import tqdm

In [4]:
def load_dataframes():
    """Loads the main dataframe and initializes or loads the reviewers' responses dataframe."""
    main_df_path = '../data/tiab/all_results_deduplicated.csv' # update as necessary
    reviewers_resp_df_path = '../data/responses/LB_response_complete.csv'  # update as necessary

    main_df = pd.read_csv(main_df_path)

    try:
        reviewers_resp_df = pd.read_csv(reviewers_resp_df_path)
    except FileNotFoundError:
        reviewers_resp_df = pd.DataFrame({
            'title': [],
            'abstract': [],
            'viewed': [],
            'response': [],
            'comments': []
        })

    return main_df, reviewers_resp_df

def adjust_doi(doi):
    """Adjusts the DOI based on its prefix."""
    try:
        if doi.startswith('https://'):
            return doi
        elif doi.startswith('10'):
            return f'https://dx.doi.org/{doi}'
        elif doi.startswith('arXiv'):
            return f'https://arxiv.org/abs/{doi.split(":")[-1]}'
    except AttributeError:
        pass

    return None

def display_record(title, abstract, doi):
    """Displays the record information."""
    doi_markdown = f' \n[DOI]({doi})' if doi else ''
    display(Markdown(f'# {title} \n## Abstract \n{abstract}\n{doi_markdown}\n1. accept,\n2. reject,\n3. do not know,\n4. return to later\n5. save and stop'))
    
def get_user_input():
    """Prompts the user for their decision and optional comments."""
    decision = input('Please input your decision: ')
    if decision in ['5', 'q']:
        return decision, None  # Signal to save and exit

    comments = ''
    if decision in ['1', '2', '3', '4']:
        if input('Last check, do you want to add comments? (y/n): ') == 'y':
            comments = input('Please type in your comments: ')

    return decision, comments

def update_reviewers_df(reviewers_resp_df, title, abstract, decision, comments):
    """Updates the reviewers response dataframe with the new entry."""
    viewed = decision in ['1', '2', '3']
    new_row = pd.DataFrame({'title': [title], 'abstract': [abstract], 'viewed': [viewed], 'response': [decision], 'comments': [comments]})
    reviewers_resp_df = pd.concat([reviewers_resp_df, new_row])
    return reviewers_resp_df

def main():
    main_df, reviewers_resp_df = load_dataframes()

    for i in tqdm(range(len(main_df))):
        clear_output(wait=True)

        record = main_df.iloc[i]
        title, abstract, doi = record['title'], record['abstract'], record['doi']
        doi = adjust_doi(doi)

        display_record(title, abstract, doi)

        decision, comments = get_user_input()
        if decision in ['5', 'q']:
            break

        reviewers_resp_df = update_reviewers_df(reviewers_resp_df, title, abstract, decision, comments)

    reviewers_resp_df.to_csv(eviewers_resp_df_path, index=False)  # update as necessary
    clear_output(wait=True)

In [None]:
if __name__ == "__main__":
    main()

## Outputs

In [None]:
# read in and print data frame with all search term results
main_df = pd.read_csv('../data/tiab/all_results_deduplicated.csv') # alter path as necessary
main_df

In [None]:
# read in and print data frame of completed Title-Abstract reviews (if exists)
try:
    reviewers_resp_df = pd.read_csv('../data/responses/LB_response_complete.csv') # alter path as necessary
except FileNotFoundError:
    reviewers_resp_df = None
    pass
reviewers_resp_df