# Ranking Label Tool

Instructions:
```
    If possible create an enviroment (conda|virtualenv) 
    This notebook uses python = 3.6
    
    Make sure to install the following libraries:
    screeninfo==0.3.1
    opencv-python==4.4.0.40
    imagededup==0.2.2
```

While Labeling all information are saved on a tmp file named as `backup.json`, do not erase it


After pressing `<esc>` and closing the opencv windows, the file `SCINT_RANK.json` will be updated and `backup.json` will be erased

Usage:
```
    A:       move backward
    D:       move forward
    R:       reset rank
    <space>: Save progress to backup file
    <esc>:   Save progress to SCINT_RANK file and exit
    
    MOUSE CLICK:
    <RIGHTBUTTON>  Clean rank
    <LEFTBUTTON>   include/remove image on rank

```
----
Author: João Phillipe Cardenuto

email:  phillipe.cardenuto@ic.unicamp.br

In [1]:
# import the necessary packages
import os
import shutil
from glob import glob
import numpy as np
import cv2
import json
import screeninfo
from IPython.display import clear_output

# Auxiliary Functions

In [2]:
def display_ranking(query):
    """
   Display all similar_images of the query as a panel. All images that are in 'query.rank' 
   will be highlighted and its number will be draw on the image position
    
    Parameters
    ----------
    query: obj of type Query
    Return
    ------
    panel: cv2 Img
        A panel generated from the 'imgs' list, with the indeces of the image in 'ranking_list'
        highlighted
    """
    # Total imgs
    total_imgs = len(query)

    # Get min shape. 
    # We use the min shape as a reference to reshape all images
    rows,cols = query.display_image_shape
    
    # Adding border to images
    bordersize = 2
    imgs_aux = []
    
    name_img_list = zip(query.similar_imgs,query.similar_imgs_array)
    for sname,simg in name_img_list:
        
        # If image in rank_list, draw a highlighted border and the Rank Position
        if sname in query.rank:
            # Get Ranking position
            rank_pos = query.rank.index(sname) + 1
            # Draw border
            img = cv2.copyMakeBorder(
                            cv2.resize(simg,(cols,rows)), # Resize img
                            top=bordersize,
                            bottom=bordersize,
                            left=bordersize,
                            right=bordersize,
                            borderType=cv2.BORDER_CONSTANT,
                            value=[0, 0, 200]
                    )
            # Draw rank position
            y,x = rows,cols
            rect = img[y//3-30:y//3, x//2:x//2+30]
            white_rect = np.ones(rect.shape, dtype=np.uint8) * 255
            res = cv2.addWeighted(rect, 0.1, white_rect, 0.9, 1.0)
            img[y//3-30:y//3, x//2:x//2+30] = res
            img = cv2.putText(img=img,
                            text=str(rank_pos),
                            org=(x//2,y//3),
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,  
                            fontScale=1,
                            color=(20,20,180),
                            thickness=2) 
        # If Image not in ranking just draw a border 
        else:
            img = cv2.copyMakeBorder(
                            cv2.resize(simg,(cols,rows)), # Resize img
                            top=bordersize,
                            bottom=bordersize,
                            left=bordersize,
                            right=bordersize,
                            borderType=cv2.BORDER_CONSTANT,
                            value=[69, 87, 96]
                    )

        imgs_aux.append(img)
        
    # number of img in vertical
    nv_imgs = int(np.floor(np.sqrt(total_imgs))) 
    # number of img in horizontal
    nh_imgs = int(np.ceil(total_imgs/nv_imgs))
    # Empty imgs
    empty =  nv_imgs*nh_imgs - total_imgs

    # Image Panel on array format 
    # Initialze image array with the first image from the imgs list
    img_ar = imgs_aux[0]

    # Insert missing imgs to complete a panel with a rectangle shape
    if empty > 0:
        for i in range(empty):
            ii = np.zeros_like(img_ar)
            ii[:] = [69, 96,96] # same color as the background
            imgs_aux += [ii]
    
    # Concatenate all imgs
    for i in range(1,len(imgs_aux)):
        img_ar = np.concatenate([img_ar,imgs_aux[i]],axis=1)
    
    # Reshape properly to (nh_imgs nv_imgs)
    r = img_ar.reshape((img_ar.shape[0],nv_imgs,-1)+img_ar.shape[2:])
    panel = np.vstack([r[:,i,:,:] for i in range(nv_imgs)])
                      
    return  panel


class Query():
    """
    This class organizes all information from a query
    - qpath: <str>
        path to the file of the query
    -qname: <str>
        File name of the query
    - similar_imgs: List [<str>]
        all images that looks similar to the query
        The <str> inside the list is the path for a image
    - rank: List [<str>]
        All images from the similar_imgs that are relevant for the query
        The <str> inside the list is the path for a image
    -qimage: cv2 image
        Loaded Image of the query
    -similar_imgs_array: cv2 array(<image>)
        Loaded all similar images to the query
    -display_image_shape: <tuple>
        Shape of the similar images as they reshaped during the panel creation process
    -cnn_rank: List os list [[<str, float>]*]
        This is the result of the imagededup CNN similarity
    """
    def __init__(self, q_key):
        # use of the global var JSON_RANK to get all desired information
        global JSON_RANK
        
        # retrieve query
        self.qpath = q_key
        self.qname = os.path.basename(self.qpath)
        self.qimage = cv2.imread(JSON_RANK[self.qpath]['IMG_PATH'])
            
        self.cnn_rank = JSON_RANK[self.qpath]['CNN_RANK']
        
        self.checked = JSON_RANK[self.qpath]['CHECKED']
        self.rank =  JSON_RANK[self.qpath]["ANNOTATED_RANK"]
        self.qclass = JSON_RANK[self.qpath]['CLASS']
        
        self.update_similar_imgs()
        
    def __len__(self):
        return len(self.similar_imgs)
    
    def _get_top_similar(self,n=36):
        rank = self.cnn_rank.copy()
        rank.sort(key=lambda x:x[1],reverse=True)
        rank = [r[0] for r in rank[0:n]]
        return rank
    
    def update_similar_imgs(self):
        # Reorder similar image inserting rank at the top
        if self.rank == []:
            #Get top 50 similar images to the query
            self.similar_imgs = self._get_top_similar()
            # Defaul rank is the top 5 from the similar images
            self.rank = self.similar_imgs[0:5]
        else:
            self.similar_imgs = self.rank.copy()
            self.similar_imgs+= [simg for simg in self._get_top_similar() if not simg in self.rank]
            
        # Load all imgs from similar_imgs using opencv   
        dirname = os.path.dirname(JSON_RANK[self.qpath]['IMG_PATH'])
        self.similar_imgs_array = [cv2.imread(dirname+"/"+rq) for rq in self.similar_imgs]
        
        # Get shape the display shape of the images in similar_imgs_array
        # This is usefull to know where the user click
        self.display_image_shape = [(np.sum(i.shape[:2]), i.shape[:2] ) \
                                    for i in self.similar_imgs_array]
        
        self.display_image_shape = sorted(self.display_image_shape)[0][1]
    
    def update_rank(self,save=True,savename='backup.json'):
        """
        Save rankl in json_rank
        """
        # Save updated rank on backup
        JSON_RANK[self.qpath]["CHECKED"] = True
        JSON_RANK[self.qpath]["ANNOTATED_RANK"] = self.rank
        if save:
            with open(savename, 'w') as bk:
                json.dump(JSON_RANK,bk)
    
    def reset_rank(self):
        self.rank = []
        self.update_similar_imgs()
    
def update_windows(q_index):
    """
    This is a callback function to update the opencv windows and the query obj
    """
    
    global query_window_name, query, LIST_JSON_KEYS

    # Update query object
    query = Query(LIST_JSON_KEYS[q_index])
    
    # Update query_window_name
    display_text = f'{query.qclass} - {query.qname}'
    cv2.displayStatusBar(query_window_name,display_text)
    #     cv2.setWindowProperty()
    #     cv2.displayOverlay(window_ranking,query_name,delayms=5000)
    
def select_img(event, x, y, flags, param): 
    """
    This is a callback function to get the user mouse pointer position
    and select the label the image displayed in the panel
    """
    global query, ranking_window_name, query_index
    
    # Clear rank if the right button
    if event == cv2.EVENT_RBUTTONDOWN:
        query.rank = []
        return

    # Update rank with lbutton
    if event == cv2.EVENT_LBUTTONDOWN:
        
        total_imgs = len(query) # Number of similar imgs to query
        
        # number of img in vertical
        nv_imgs = int(np.floor(np.sqrt(total_imgs)))
        # number of img in horizontal
        nh_imgs = int(np.ceil(total_imgs/nv_imgs))
        # Get height and width of each resized image on the panel
        r_height, r_width = query.display_image_shape
        # Include border
        r_height += 4
        r_width += 4

        # Image position on the panel
        #image_pos = y//r_height +((x//r_width)*nv_imgs) # Column rank approach
        image_pos = x//r_width +((y//r_height)*nh_imgs) # Row rank approach
        
        # display position of the clicked image in the panel
        display_text = str(image_pos)
        cv2.displayStatusBar(ranking_window_name,display_text,delayms=1000)
        
        # If image not in rank, add image to Rank
        if image_pos <= total_imgs and not (query.similar_imgs[image_pos] in query.rank):
            query.rank.append(query.similar_imgs[image_pos])
        # If image in rank, remove image from Rank
        elif image_pos <= total_imgs and (query.similar_imgs[image_pos] in query.rank):
            query.rank.remove(query.similar_imgs[image_pos])
        
    if event == cv2.EVENT_LBUTTONUP:
        query.update_similar_imgs()
        query.update_rank(save=False)
            
def create_query_window():
    global query_window_name
    
    
    cv2.namedWindow(query_window_name, cv2.WND_PROP_FULLSCREEN)
    cv2.moveWindow(query_window_name, 2*screen.width//5, 0)
    cv2.resizeWindow(query_window_name,screen.width//5, screen.height//4)
    
def create_rank_window(total_queries):
    global ranking_window_name
    
    
    cv2.namedWindow(ranking_window_name, cv2.WND_PROP_FULLSCREEN)
    cv2.moveWindow(ranking_window_name, 0, screen.height//4+100)
    cv2.resizeWindow(ranking_window_name,screen.width, screen.height - (screen.height//4+100))
    
    cv2.createTrackbar("Query List", ranking_window_name,0, total_queries, update_windows)
    cv2.setMouseCallback(ranking_window_name, select_img)



# Generate json rank files

In [3]:
from imagededup.methods import CNN
cnn_encoder = CNN()

# If there is no previous rank generate one
if not os.path.isfile('SCINT_RANK.json'):
    scint_rank = {}
    
    for dirname in sorted(glob('panel_per_class/*')):
        if not os.path.isdir(dirname):
            continue
        rank_class = os.path.basename(dirname)
        rank_file = f'{rank_class}_rank.json'

        if (not os.path.isfile(rank_file)):
            # Similarity is set to 0.3
            JSON_RANK = cnn_encoder.find_duplicates(image_dir=dirname,min_similarity_threshold=0.3,scores=True)
            # Make a serializable number
            for key,value in JSON_RANK.items():
                JSON_RANK[key] = [[v[0],float(v[1])] for v in value
                                 ]
        # Insert additional informations into rank_file
        else:
            with open(rank_file,'r') as rj:
                JSON_RANK = json.load(rj)

        # If rank wasn't processed yet
        if type(list(JSON_RANK.items())[0][1]) is list:
            LIST_QUERIES = list(JSON_RANK.keys())
            LIST_QUERIES.sort()

            for q_index, q in enumerate(LIST_QUERIES):

                print(f'Processing {os.path.basename(dirname)} - {q_index} / {len(JSON_RANK)}',\
                      end='\r', flush=True)
                # Fix rank to top 50 and include new keys to the query
                cnn_rank = JSON_RANK[q]
                # Shrink the number of retrieved itens
                cnn_rank.sort(key=lambda x:x[1],reverse=True)
                cnn_rank = [r for r in cnn_rank[0:50]]

                # Insert query into scint_rank
                scint_rank[q] =    {
                                         'CHECKED': False,
                                         "CNN_RANK": cnn_rank,
                                         "ANNOTATED_RANK": [],
                                         'CLASS': os.path.basename(dirname),
                                         'IMG_PATH': f'{dirname}/{q}'
                                        }
            print("\nDone")
            
        # If rank was alredy processed
        else:
            LIST_QUERIES = list(JSON_RANK.keys())
            LIST_QUERIES.sort()

            for q_index, q in enumerate(LIST_QUERIES):

                print(f'ss {os.path.basename(dirname)} - {q_index+1} / {len(JSON_RANK)}',\
                      end='\r', flush=True)

                # Insert query into scint_rank
                scint_rank[q] =    {
                                         'CHECKED': JSON_RANK[q]['CHECKED'],
                                         "CNN_RANK": JSON_RANK[q]['CNN_RANK'],
                                         "ANNOTATED_RANK": JSON_RANK[q]['ANNOTATED_RANK'],
                                         'CLASS': os.path.basename(dirname),
                                         'IMG_PATH': f'{dirname}/{q}'
                                        }
            print("\nDone")
    
    # labeling position status
    scint_rank["LAST_POSITION"] = 0
    # Save Rank into Json File
    with open ("SCINT_RANK.json",'w') as srj:
        json.dump(scint_rank,srj)
        

clear_output()
print("Done!")

Done!


# TOOL RUN CELL
----

In [None]:
########################
# Load Json Rank frim SCINT_RANK or backup
global JSON_RANK, LIST_JSON_KEYS
RANK_JSON_FILE = 'SCINT_RANK.json'

if os.path.isfile(RANK_JSON_FILE):
    with open(RANK_JSON_FILE,'r') as f:
         JSON_RANK = json.load(f)
    LAST_POSITION = JSON_RANK['LAST_POSITION']
else:
    raise IOError(f"File {RANK_JSON_FILE} does not exist")

# Remove LAST_POSITION INFO FROM JSON_RANK
JSON_RANK.pop('LAST_POSITION', None)

LIST_JSON_KEYS =  list(JSON_RANK.keys())
########################
# Initialize Opencv Windows
screen_id = 0 # Get screen
screen = screeninfo.get_monitors()[screen_id]

########################
# Setup global vars
global query, query_window_name, ranking_window_name, query_index
query_window_name = 'Query'
ranking_window_name = 'Ranking'
query_index = LAST_POSITION
total_queries = len(list(JSON_RANK.keys()))

# Set query windows
create_query_window()
# set ranking windows
create_rank_window(total_queries-1)
# Update windows
update_windows(query_index)
cv2.setTrackbarPos("Query List",ranking_window_name,query_index)
while(1):

    key = cv2.waitKey(20)
    # Display images on respectve windows
    cv2.imshow(query_window_name, query.qimage)
    cv2.imshow(ranking_window_name,display_ranking(query))

    if key==ord('d'):  
        # Update Backup json
        query.update_rank()

        # Get query_index from slider
        query_index = cv2.getTrackbarPos("Query List",ranking_window_name)
        query_index +=1
        query_index = query_index % total_queries
        update_windows(query_index)
        # Display if Query was checked
        chk = f"QUERY CHECKED - {query.checked}"
        cv2.displayOverlay(ranking_window_name,chk,delayms=2000)
        # Move Slider forward
        cv2.setTrackbarPos("Query List",ranking_window_name,query_index)

    elif key==ord('a'):
        # Update Backup json
        query.update_rank()
        # Get query_index from slider
        query_index = cv2.getTrackbarPos("Query List",ranking_window_name)
        query_index -=1
        query_index = query_index % total_queries
        update_windows(query_index)
        # Display if Query was checked
        chk = f"QUERY CHECKED - {query.checked}"
        cv2.displayOverlay(ranking_window_name,chk,delayms=2000)
        #Move Slider Backward
        cv2.setTrackbarPos("Query List",ranking_window_name,query_index)


    elif key==ord('r'):
        # Reset Rank
        query.reset_rank()
        ranking_list = []

    elif key==ord(' '):  
        # Update Backup json
        cv2.displayOverlay(ranking_window_name,'SAVED',delayms=5000)
        query.update_rank()
        update_windows(query_index)

    if key==27:
        print("ESCAPE")
        cv2.displayOverlay(ranking_window_name,'EXIT',delayms=1000)
        query.update_rank()
        break # else print its value

        
print("Saving Labeling Status")
# Save all Labeling Status
LAST_POSITION = query_index
JSON_RANK['LAST_POSITION'] = LAST_POSITION
query.update_rank()

# Copy Backup to RANK_JSON_FILE
if os.path.isfile("backup.json"):
    os.rename('backup.json',RANK_JSON_FILE)
    
cv2.destroyAllWindows()

In [119]:
cv2.destroyAllWindows()