In [13]:
import json
from ipywidgets import widgets, Button, VBox, HBox, Textarea, Checkbox, Label
from IPython.display import display, clear_output
import os

# File paths
original_file = '/home/azureuser/localfiles/code/slm-finetuning/fc-finetune-hf/src/data/model_data/commands_data-train_incomplete.json'
annotated_file = original_file.replace(".json", "_annotated.json")

annotated_file_found = False
# Load JSON data
if os.path.exists(annotated_file):
    print(f"Loading progress from {annotated_file}")
    # If annotated file exists, load progress
    with open(annotated_file, "r") as f:
        annotated_data = json.load(f)
    annotated_file_found = True
else:
    print(f"No progress file found at {annotated_file}. Starting from scratch.")
    # If no progress file, load the original file
    with open(original_file, "r") as f:
        data = json.load(f)
    annotated_data = [{} for _ in range(len(data))]  # Placeholder for annotations

# Determine if data is list or dict
entries = data if isinstance(data, list) else list(data.values())

# Initialize global tracker for current entry
entry_index = 0
if annotated_file_found:
    # Find the first unannotated entry
    for i, entry in enumerate(annotated_data):
        if not entry:
            entry_index = i
            break
        
# Initialize widgets
key_label = Label("Current Entry:")
entry_editor = Textarea(value="", layout=widgets.Layout(width="80%", height="150px"))  # Smaller text area
bool_selector = Checkbox(value=True, description="is_correct")  # Default to True
save_button = Button(description="Save Changes and Next", button_style="success")
skip_button = Button(description="Skip Entry", button_style="warning")
prev_button = Button(description="Go Back", button_style="primary")
save_progress_button = Button(description="Save Progress", button_style="info")  # Save progress button
status_label = Label("")

# Function to save the current state of the entry
def save_current_entry():
    global entry_index
    try:
        # Update the current entry in annotated_data with the edited text and boolean value
        current_entry = json.loads(entry_editor.value)  # Parse JSON from text area
        current_entry['is_correct'] = bool_selector.value  # Add/update 'is_correct'
        annotated_data[entry_index] = current_entry
    except Exception as e:
        status_label.value = f"Error saving entry: {e}"

# Function to load an entry
def load_entry():
    global entry_index
    if 0 <= entry_index < len(entries):
        # Retrieve the current entry from annotated_data if it exists, else from entries
        if annotated_data[entry_index]:
            current_entry = annotated_data[entry_index]
        else:
            current_entry = entries[entry_index]
        entry_editor.value = json.dumps(current_entry, indent=4)  # Load into the editor
        bool_selector.value = current_entry.get('is_correct', True)  # Load boolean value
        status_label.value = f"Entry {entry_index + 1} of {len(entries)}"
    else:
        # If all entries are processed, save the data
        clear_output()
        save_progress()
        print(f"Annotation complete! Saved to {annotated_file}")

# Function to save progress to file
def save_progress(b=None):
    with open(annotated_file, "w") as f:
        json.dump(annotated_data, f, indent=4)
    status_label.value = f"Progress saved to {annotated_file}"

# Event handler for "Save Changes and Next"
def save_changes_and_next(b):
    global entry_index
    save_current_entry()  # Save the current state
    entry_index += 1  # Move to the next entry
    load_entry()

# Event handler for "Skip Entry"
def skip_entry(b):
    global entry_index
    save_current_entry()  # Save the current state without overwriting annotations
    entry_index += 1
    load_entry()

# Event handler for "Go Back"
def go_back(b):
    global entry_index
    save_current_entry()  # Save the current state before going back
    if entry_index > 0:
        entry_index -= 1
        load_entry()
    else:
        status_label.value = "Error: You are already at the first entry."

# Bind buttons to handlers
save_button.on_click(save_changes_and_next)
skip_button.on_click(skip_entry)
prev_button.on_click(go_back)
save_progress_button.on_click(save_progress)

# Display the widgets
load_entry()  # Load the first entry
ui = VBox([
    key_label,
    entry_editor,
    HBox([bool_selector, save_button, skip_button, prev_button]),
    save_progress_button,
    status_label
])
display(ui)

No progress file found at /home/azureuser/localfiles/code/slm-finetuning/fc-finetune-hf/src/data/model_data/commands_data-train_incomplete_annotated.json. Starting from scratch.


VBox(children=(Label(value='Current Entry:'), Textarea(value='{\n    "system": "<|im_start|>system\\nYou are a…