Let's make this interface really simple for now and extensible for later. The main purpose of the interface is to make it easy to manually label our data. To this end, I will read in our JSON file continuously, record user information, and save it. There will be minimal error handling or edge case testing. We assume the user will select the proper results each time. User progress will not be saved. If overrides are needed, please override throught the interface code itself. This is be very jank, but should be fast, simple, and work for our very specific use case.

Make continuous annotation_ui file stream. We will assume that the user is keeping track of which annotations they've already made on files.

In [1]:
import gradio as gr
import re
import json
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_speaker(opening_statement):
    return re.findall(r"<speaker>(.*?)</speaker>", opening_statement)[0]

def get_opening_text(opening_statement):
    return re.findall(r"<text>(.*?)</text>", opening_statement)[0]

def get_text(case_metadata):
    return case_metadata.replace("<p>", "").replace("</p>", "").replace("\n", " ")

def format_opening_statement(opening_statement):
    return f"**{get_speaker(opening_statement)}** {get_opening_text(opening_statement)}"

def get_year(transcript_id):
    return transcript_id[:4]

def get_docket(transcript_id):
    return transcript_id[5:11]

In [3]:
BRIEF_TRANSCRIPTS_DIR = "2023-2024_case_briefs/"
with open('datasets/2024_questions_for_eval.json') as f:
    all_data_to_annotate = json.load(f)

def get_brief_json(transcript_id):
    brief_name = transcript_id[:11] + ".json"
    with open(BRIEF_TRANSCRIPTS_DIR + brief_name) as json_file:
        brief_json = json.load(json_file)
    return brief_json

def generate_markdown(transcript_id, petitioner_or_respondent, justice_name):
    selected_transcript = [x for x in all_data_to_annotate if x["transcript_id"] == transcript_id][0]
    advocate_side = selected_transcript[petitioner_or_respondent.lower()]
    true_questions_to_advocate = advocate_side[("alito" if justice_name == "Samuel Alito" else "sotomayor")]["true_questions"]
    
    opening_statement = advocate_side["opening_statement"]
    coherent_questions = true_questions_to_advocate[0]["coherent"]
    brief_json = get_brief_json(transcript_id)

    return f"""
        # {brief_json["name"]} (Docket {get_docket(transcript_id)}, {get_year(transcript_id)})
        ### Facts of the Case
        {get_text(brief_json["facts_of_the_case"])}
        ### Legal Question
        {get_text(brief_json["question"])}
        ## {petitioner_or_respondent} {get_speaker(opening_statement)}'s Opening Statement
        {get_opening_text(opening_statement)}
        ## {justice_name}'s Questions (from the Oral Argument Transcripts)
        {"\n".join([f"* {get_text(x)}'" for x in coherent_questions])}
        """

with gr.Blocks(theme=gr.themes.Ocean()) as annotation_ui:
    with gr.Column() as config_setup_row:
        # initial config set-up
        transcript_select_dropdown = gr.Dropdown((x["transcript_id"] for x in all_data_to_annotate), 
                                                label="Select Transcript to Annotate", interactive=True)
        petitioner_or_respondent_select = gr.Radio(["Petitioner", "Respondent"], label="Select Side", interactive=True)
        justice_select = gr.Radio(["Sonia Sotomayor", "Samuel Alito"], label="Select Justice", interactive=True, visible=False)
        question_select_button = gr.Button(value="Annotate!")

    with gr.Column() as listing:
        transcript_info = gr.Markdown()
    
        input_text = gr.Textbox(label="input")

        @gr.render(inputs=input_text)
        def show_split(text):
            if len(text) == 0:
                gr.Markdown("## No Input Provided")
            else:
                for letter in text:
                    with gr.Tab("Lion"):
                        gr.Image("lion.jpg")
                        gr.Button("New Lion")
        
    # config set-up interactive logic
    petitioner_or_respondent_select.select(fn=lambda: gr.Radio(visible=True), inputs=None, outputs=justice_select)
    justice_select.select(fn=lambda: gr.Button(visible=True), inputs=None, outputs=question_select_button)
    question_select_button.click(fn=lambda: gr.Row(visible=False), inputs=None, outputs=config_setup_row).then(
        generate_markdown, [transcript_select_dropdown, petitioner_or_respondent_select, justice_select], transcript_info
    )
annotation_ui.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [9]:
annotation_ui.close()

Closing server running on port: 7861
