In [None]:
# We clone all the code and tools from GitHub to this Notebook
!git clone -b 2-vdb https://github.com/kasparvonbeelen/heritageweaver.git
!sh /content/heritageweaver/create_env.sh

In [None]:
# # we need to restart the session
# # to load the updated Pillow version
import os
os.kill(os.getpid(), 9)

In [None]:
import chromadb, random, requests
import ipyannotations.generic
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from PIL import Image
from heritageweaver.weavingtools.annotation_tools import *
from heritageweaver.weavingtools.weaving_tools import *
out_path = Path('/content/annotations')
out_path.mkdir(exist_ok=True)
sns.set()

In [None]:
def plot_record_pair(record_pair):
    fig, axes = plt.subplots(1, 2, figsize=(15, 7.5))
    
    for i in range(2):
        record = collection_df[collection_df.record_id==record_pair[i]]
        img_path = record.img_url.values[0]
        if 'sciencemuseum' in img_path:
            img_path = img_path.replace('.uk/images/','.uk/').lower()
        description = soft_wrap_text(record.description.values[0])
        img = Image.open(requests.get(img_path,  stream=True).raw,).convert('RGB')
        axes[i].imshow(img)
        axes[i].set_title(description, fontsize = 18)
        axes[i].axis('off')
         
    plt.show()

In [None]:
annotator = 'KB'
num_annotations = 10
coll1, coll2 = 'smg','nms'
modality1, modality2 = 'image', 'image'
percentile = 99.5
randomize = True

In [None]:
collection_db = load_db("ce_comms_db",'google/siglip-base-patch16-224')
collection_df = pd.read_csv('/content/heritageweaver/data/communications_data.csv')
edges, image_similarities, inputs = get_edges(collection_db,coll1,coll2, modality1, modality2, percentile); len(edges)

In [None]:
if randomize:
    random.shuffle(edges)
img_pairs = edges[:num_annotations]
to_annotate = img_pairs.copy()
labels = []

widget = ipyannotations.generic.ClassLabeller(
        options=['link', 'no link'], allow_freetext=True,
        display_function=plot_record_pair)


def store_annotations(entity_annotation):
    labels.append(entity_annotation)
    try:
        widget.display(img_pairs.pop(0))
    except IndexError:
        print("Finished.")
widget.on_submit(store_annotations)
widget.display(img_pairs.pop(0))
widget


In [None]:
annotations_df = pd.DataFrame(to_annotate, columns=['coll1','coll2'])
annotations_df['labels'] = labels[:num_annotations]
for varname, var in [('coll1_name',coll1), ('coll2_name',coll2), ('modality1',modality1), ('modality2',modality2)]:
    annotations_df[varname] = var
annotations_df.to_csv(out_path / f'{annotator}_{time.time()}')

# Fin.