In [201]:
import json, os, random
import pandas as pd
from itertools import islice
from ipywidgets import ToggleButtons
from IPython.display import display, clear_output

In [202]:
TOTAL_TO_REVIEW = 50

In [203]:
with open('phenotypeGroups.json') as phenotypeGroups:
    phenotypeGroups = json.loads(phenotypeGroups.read())

In [None]:
phenotypeGroups = dict(islice(phenotypeGroups.items(), TOTAL_TO_REVIEW))
phenotypeGroups = dict(
    random.sample(list(phenotypeGroups.items()), len(phenotypeGroups))
)
print(
    [
        json.loads(group)['"name"'].split('---')[0]
        for group in list(phenotypeGroups.keys())
    ]
)

In [205]:
def loadOrInitialise():
    if os.path.exists('review.csv'):
        df = pd.read_csv('review.csv')
        last_classified = df['classification'].last_valid_index()
        index = last_classified + 1 if last_classified is not None else 0
        if index >= len(df):
            print('review complete')
        else:
            print(f'resuming from item {index + 1}')
    else:
        df = pd.DataFrame(
            [
                [
                    json.loads(leadPhenotype)['"name"'].split('---')[0],
                    phenotype['"name"'].split('---')[0],
                ]
                for leadPhenotype, phenotypes in phenotypeGroups.items()
                for phenotype in phenotypes
            ],
            columns=['condition group', 'group member'],
        )
        if 'classification' not in df.columns:
            df['classification'] = pd.NA
        index = 0
    return df, index

In [206]:
def save(df):
    output_df = df.copy()
    output_df['classification'] = output_df['classification'].where(
        output_df['classification'].notna(), ''
    )
    output_df.to_csv('review.csv', index=False)

In [207]:
def review(df, index):
    if index >= len(df):
        return
    item = df.iloc[index]
    allConditions = list(dict.fromkeys(df['condition group']))
    print(
        f'group: {allConditions.index(item['condition group']) + 1} of {len(allConditions)}'
    )
    print(f'review {index + 1} of {len(df)}:')
    print(f'condition group: {item['condition group']}')
    print(f'group member: {item['group member']}')
    buttons = ToggleButtons(
        options=['true positive', 'false positive', 'unsure', 'finish'],
        description='classification:',
        disabled=False,
        button_style='',
        style={'button_width': 'auto'},
    )
    buttons.index = None

    def clicked(change):
        nonlocal df, index
        if change['new'] is not None:
            if change['new'] == 'finish':
                save(df)
            else:
                df.at[index, 'classification'] = change['new']
                save(df)
                clear_output()
                index += 1
                if index < len(df):
                    review(df, index)
                else:
                    print('review complete')

    buttons.observe(clicked, names='value')
    display(buttons)

In [None]:
df, index = loadOrInitialise()
review(df, index)