In [209]:
import json, os, random
import pandas as pd
from itertools import islice
from ipywidgets import ToggleButtons
from IPython.display import display, clear_output

In [210]:
TOTAL_TO_REVIEW = 50

In [211]:
with open('phenotypeGroups.json') as phenotypeGroups:
    phenotypeGroups = json.loads(phenotypeGroups.read())

In [212]:
phenotypeGroups = dict(islice(phenotypeGroups.items(), TOTAL_TO_REVIEW))
phenotypeGroups = dict(
    random.sample(list(phenotypeGroups.items()), len(phenotypeGroups))
)
print(
    [
        json.loads(group)['"name"'].split('---')[0]
        for group in list(phenotypeGroups.keys())
    ]
)

['Acute-Kidney-Injury', 'Ethnic-Status', 'Depression', 'Intellectual-disability', 'Rheumatoid-Arthritis', 'Anxiety', 'Abdominal-aortic-aneurysm', 'Crohns-disease', 'Leukaemia', 'Pneumonia', 'Ischaemic-Heart-Disease', 'Alcohol-Problems', 'Osteoarthritis', 'Hearing-loss', 'Bipolar-affective-disorder-and-mania', 'Lupus-erythematosus', 'Self-Harm', 'Cardiovascular-Disease', 'Dementia', 'End-stage-renal-disease', 'Multiple-sclerosis', 'Chronic-Kidney-Disease', 'Coronary-heart-disease-not-otherwise-specified', 'Chronic-Obstructive-Pulmonary-Disease', 'Personality-disorders', 'Asthma', 'Cystic-Fibrosis', 'Dermatitis', 'Heart-failure', 'Stroke-NOS', 'Lower-Respiratory-Tract-Infections', 'Ankylosing-spondylitis', 'Tuberculosis', 'Juvenile-arthritis', 'Migraine', 'Giant-Cell-arteritis', 'Alopecia-areata', 'HIV', 'Polymyalgia-Rheumatica', 'Psoriasis', 'Blood-Pressure', 'Diabetes', 'Bacterial-Diseases', 'Atrioventricular-block-first-degree', 'Atrial-fibrillation', 'Appendicitis', 'COVID-19-infecti

In [213]:
def loadOrInitialise():
    if os.path.exists('review.csv'):
        df = pd.read_csv('review.csv')
        last_classified = df['classification'].last_valid_index()
        index = last_classified + 1 if last_classified is not None else 0
        if index >= len(df):
            print('review complete')
        else:
            print(f'resuming from item {index + 1}')
    else:
        df = pd.DataFrame(
            [
                [
                    json.loads(leadPhenotype)['"name"'].split('---')[0],
                    phenotype['"name"'].split('---')[0],
                ]
                for leadPhenotype, phenotypes in phenotypeGroups.items()
                for phenotype in phenotypes
            ],
            columns=['condition group', 'group member'],
        )
        if 'classification' not in df.columns:
            df['classification'] = pd.NA
        index = 0
    return df, index

In [214]:
def save(df):
    output_df = df.copy()
    output_df['classification'] = output_df['classification'].where(
        output_df['classification'].notna(), ''
    )
    output_df.to_csv('review.csv', index=False)

In [215]:
def review(df, index):
    if index >= len(df):
        return
    item = df.iloc[index]
    allConditions = list(dict.fromkeys(df['condition group']))
    print(
        f'group: {allConditions.index(item['condition group']) + 1} of {len(allConditions)}'
    )
    print(f'review {index + 1} of {len(df)}:')
    print(f'condition group: {item['condition group']}')
    print(f'group member: {item['group member']}')
    buttons = ToggleButtons(
        options=['true positive', 'false positive', 'unsure', 'finish'],
        description='classification:',
        disabled=False,
        button_style='',
        style={'button_width': 'auto'},
    )
    buttons.index = None

    def clicked(change):
        nonlocal df, index
        if change['new'] is not None:
            if change['new'] == 'finish':
                save(df)
            else:
                df.at[index, 'classification'] = change['new']
                save(df)
                clear_output()
                index += 1
                if index < len(df):
                    review(df, index)
                else:
                    print('review complete')

    buttons.observe(clicked, names='value')
    display(buttons)

In [216]:
df, index = loadOrInitialise()
review(df, index)

group: 26 of 50
review 130 of 254:
condition group: Asthma
group member: Asthma


ToggleButtons(description='classification:', options=('true positive', 'false positive', 'unsure', 'finish'), â€¦