In [None]:
config_dir = '/PATH_TO_CONFIG_DIR'

## Imports

In [None]:
from hydra import compose, initialize_config_dir

with initialize_config_dir(config_dir=config_dir, version_base='1.3'):
        config_dict = compose(config_name='config')
    
import os
os.chdir(config_dict.cwd)

In [None]:
import json, pandas as pd, os
from typing import Dict, List
from proconsul.datasets_extraction.dataset_scheme import DatasetScheme

In [None]:
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display, Markdown

In [None]:
from time import gmtime, strftime

In [None]:
def pretty_print(df):
    return display(
        Markdown(
            df.to_html()
            .replace("\\n", "<br>")
            .replace("<td>", '<td align="left" style="width:50%; font-size:26px;">')
            .replace('<table border="1" class="dataframe">', '<table border="1" class="dataframe" style="width:85%">')
        )
    )

## Data

In [None]:
left_data_path = "/PATH/SUMMARIES_A.json"
right_data_path = "/PATH/SUMMARIES_B.json"

split = "test"
with open(left_data_path, "r") as file:
    left_dataset = json.load(file)[split]


with open(right_data_path, "r") as file:
    right_dataset = json.load(file)[split]

points = [{DatasetScheme.ID_KEY: k, "left": left_dataset[k], "right": right_dataset[k]} for k in left_dataset.keys()]
points = points[:1000]

assert all(
    p["left"][DatasetScheme.CODE_KEY] == p["right"][DatasetScheme.CODE_KEY]
    and p["left"][config_dict.evaluation.seen_context_key] == p["right"][config_dict.evaluation.seen_context_key]
    for p in points
)

## Labeling setup

In [None]:
q_to_labels = {
    'Which summary is more sufficient?': "Sufficiency",
}

all_widgets = {}
for q in q_to_labels:
    widget_axis = tuple([widgets.ToggleButtons(
        options=[('Unspecified', 'Unspecified'), ('Left', 0), ('Right', 1), ('Draw', 2)],
        description=q,
    ) for _ in range(len(points))])
    all_widgets[q] = widget_axis


def screen_cur_labels(save_with_name,
                      all_widgets: Dict, 
                      points: List) -> None:
    for i in range(len(points)):
        for q, widget_axis in all_widgets.items():
            points[i][q_to_labels[q]] = widget_axis[i].value
    if save_with_name:
        filename = filename_.value
    else:
        filename = 'annotation_chkpt_'+strftime("%Y-%m-%d", gmtime())
    save_dir = ('/').join(config_dict.path_after_annotation.split('/')[:-1])

    full_path = f'{save_dir}/{filename}.json'
    old_data = dict()
    if os.path.exists(full_path):
        with open(full_path, 'r') as file:
            old_data = json.load(file)
    old_data[split] = {p[DatasetScheme.ID_KEY]: p for p in points}

    os.makedirs(save_dir, exist_ok=True)
    with open(f'{save_dir}/{filename}.json', 'w') as file:
        json.dump(old_data, file, indent=2)


save_state = False
next_state = False
slider = widgets.IntSlider(min=0, max=len(points)-1, step=1, value=0)
def get_studio(save_click: bool, 
               point_n: int,
               next_click: bool,
               all_widgets: Dict, 
               points: List) -> None:
    global save_state, next_state, slider
    if next_state != next_click:
        next_state = next_click
        slider.value += 1
    else:
        save_with_name = save_state!=save_click
        save_state = save_click
        screen_cur_labels(save_with_name, all_widgets, points)
        for q, widget_axis in all_widgets.items():
            display(widget_axis[point_n])
        print()
        pretty_print(
            pd.DataFrame(
                {
                    "left": [points[point_n]["left"][config_dict.evaluation.doc_to_eval_key]],
                    "right": [points[point_n]["right"][config_dict.evaluation.doc_to_eval_key]],
                }
            )
        )
        print(f'______\n{points[point_n]["left"][DatasetScheme.CODE_KEY]}')
        if config_dict.evaluation.seen_context_key is None:
            seen_cxt = ''
        else:
            seen_cxt = points[point_n]["left"][config_dict.evaluation.seen_context_key]
        print(f'\n______\nCONTEXT SEEN:\n\n{seen_cxt}')

In [None]:
default_filename = config_dict.path_after_annotation.split('/')[-1].split('.')[0]
default_dirname = config_dict.path_after_annotation.split('/')[-2]
name_ = widgets.Text(placeholder='used as dir name', 
                     description='Your name:',
                     value=default_dirname,
                    )
filename_ = widgets.Text(placeholder='filename to save', 
                         description='Filename:',
                         value=default_filename,
                        )

## Labeling itself

In [None]:
print('Don\'t change 2 filds below if you want to use current evaluation config downstream.')
display(name_)
display(filename_)
print('\n')

interact(get_studio, 
         save_click=widgets.ToggleButton(description='Click to Save', 
                                         button_style='info', 
                                         value=False),
         point_n=slider,
         next_click=widgets.ToggleButton(description='Next point', 
                                         button_style='warning', 
                                         value=False),
         all_widgets=fixed(all_widgets),
         points=fixed(points),
        );