In [59]:
import nucleus
from pathlib import Path
import nucleus.autocurate
import numpy as np
import scipy.stats

In [2]:
API_KEY = 'test_47f6394c4822426389461f36334a45ff' # Vinjai's API key
client = nucleus.NucleusClient(API_KEY)

In [3]:
dataset = client.create_dataset("Test Autocurate")
DATASET_ID = dataset.info()['dataset_id']

In [4]:
TEST_IMG_URLS = [
    "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/airplane.jpeg",
    "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/arctichare.jpeg",
    "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/baboon.jpeg",
    "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/barbara.jpeg",
    "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/cat.jpeg",
]

In [8]:
ds_items = []
for url in TEST_IMG_URLS:
    ds_items.append(nucleus.DatasetItem(image_location=url, reference_id=Path(url).name))
response = dataset.append(ds_items)

Remote file batches: 100%|██████████| 1/1 [00:08<00:00,  8.40s/it]


In [10]:
model = client.add_model(name="Test Model", reference_id="test-model")

In [46]:
run = model.create_run(name="Test Model Run 2", dataset=dataset, predictions=[])

0it [00:00, ?it/s]


In [47]:
num_predictions_per_img = [2, 1, 3, 0, 2]
predictions = [
    nucleus.BoxPrediction(
        label=f"Test Prediction {j}-{i}",
        x=60+i*10+n*10,
        y=50+i*10+n*10,
        width=80+i*10+n*10,
        height=70+i*10+n*10,
        reference_id=Path(TEST_IMG_URLS[j]).name,
        class_pdf={"label_A": i*0.05+n*0.05, "label_B": 0.1+i*0.02+n*0.02, "label_C": 0.9-i*0.07-n*0.07},
        confidence=max(i*0.05+n*0.05, 0.1+i*0.02+n*0.02, 0.9-i*0.07-n*0.07),
    )
    for j, n in enumerate(num_predictions_per_img) for i in range(n)
]

In [48]:
predictions

[BoxPrediction(label='Test Prediction 0-0', x=80, y=70, width=100, height=90, reference_id='airplane.jpeg', item_id=None, annotation_id=None, metadata={}),
 BoxPrediction(label='Test Prediction 0-1', x=90, y=80, width=110, height=100, reference_id='airplane.jpeg', item_id=None, annotation_id=None, metadata={}),
 BoxPrediction(label='Test Prediction 1-0', x=70, y=60, width=90, height=80, reference_id='arctichare.jpeg', item_id=None, annotation_id=None, metadata={}),
 BoxPrediction(label='Test Prediction 2-0', x=90, y=80, width=110, height=100, reference_id='baboon.jpeg', item_id=None, annotation_id=None, metadata={}),
 BoxPrediction(label='Test Prediction 2-1', x=100, y=90, width=120, height=110, reference_id='baboon.jpeg', item_id=None, annotation_id=None, metadata={}),
 BoxPrediction(label='Test Prediction 2-2', x=110, y=100, width=130, height=120, reference_id='baboon.jpeg', item_id=None, annotation_id=None, metadata={}),
 BoxPrediction(label='Test Prediction 4-0', x=80, y=70, width=

In [49]:
run.predict(predictions)

100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


{'model_run_id': 'run_c4rhn9dwm91873cjtar0',
 'predictions_processed': 8,
 'predictions_ignored': 0}

In [50]:
run.commit()

{'model_run_id': 'run_c4rhn9dwm91873cjtar0'}

In [66]:
nucleus.autocurate.entropy("Mean Entropy Autocurate", [run], client)

AsyncJob(job_id='job_c4rmxenwm91adf5m2wgg', job_last_known_status='Started', job_type='autocurateEntropy', job_creation_time='2021-09-02T18:22:51.550Z', client=NucleusClient(api_key='test_47f6394c4822426389461f36334a45ff', use_notebook=False, endpoint='http://localhost:3000/v1/nucleus'))

In [56]:
taxonomy = ['label_A', 'label_B', 'label_C']
entropies_per_pred = [scipy.stats.entropy([pred.class_pdf[c] for c in taxonomy]) for pred in predictions]

In [62]:
def mean(slice):
    if len(slice) == 0:
        return 0
    return np.mean(slice)
entropies_per_image = [mean(entropies_per_pred[sum(num_predictions_per_img[:i]):sum(num_predictions_per_img[:i+1])]) for i in range(len(num_predictions_per_img))]

In [68]:
list(sorted(zip(entropies_per_image, [Path(url).name for url in TEST_IMG_URLS]), reverse=True))

[(0.9193399971342323, 'baboon.jpeg'),
 (0.7739506420086124, 'cat.jpeg'),
 (0.7739506420086124, 'airplane.jpeg'),
 (0.5588717879206501, 'arctichare.jpeg'),
 (0, 'barbara.jpeg')]