# Image Annotator for Labelling Images Into User-Defined Categories
Adapted from : https://blogs.sas.com/content/subconsciousmusings/2021/11/23/introducing-jupicl-fast-image-labelling-entirely-within-a-jupyter-notebook/

This code displays each image in dataset and allow user to specify weather conditions he can observe (cloudy or not).

Input : PolarizedDatabase folder with cropped images grouped in numpy files, you can download all or only some of the "YYY-MM-DD_raw.npy" file days to accelerate processing on your computer.

Output : Files with same timestamps than input file in a column and annotations in another column.


## Import packages and define essential helper functions

In [1]:
import cv2
from IPython.display import display, clear_output
from PIL import Image
import os

import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from collections import defaultdict


In [2]:
in_path="/scratch/lpoughon/PolarizedDatabase/"
calib_path="/scratch/lpoughon/PolarizedDatabase/calib/"
out_path="/scratch/lpoughon/PolarizedDatabase/"
limit_Zenith_Angle=np.pi*0.5 #Limit Zenith Angle to crop circularly data inside of camera field-of-view.

dirs = os.listdir( in_path ) # Listing Files
dirs_files=[]
for file in dirs:
    if file[-7:]=='raw.npy': # Keeping only ones with ".npy" files. 
        dirs_files.append(file)
dirs_files.sort() #We sort the files in alphabetical order. 
# Since the format is YYYY-MM-DDTHH-mm-ss.npy, the files are sorted in chronological order.

### Choose file to annotate by changing the number you want

In [10]:
day_wanted=1 # Change tha to choose what day to annotate
print("File used :",in_path+dirs_files[day_wanted])
images_pola=np.load(in_path+dirs_files[day_wanted],allow_pickle=True)
save_path=out_path+dirs_files[day_wanted].split(".npy")[0]+"_annotations.npy"
print("Will save results in :"+save_path)
#images=images_pola[:,0]

File used : /scratch/lpoughon/PolarizedDatabase/2022-07-30_raw.npy
Will save results in :/scratch/lpoughon/PolarizedDatabase/2022-07-30_raw_annotations.npy


In [3]:
def mkdict_from_user():
    userdict = {}
    while True:
        res = input("Enter 'class_labels:keyboard_shortcut', such as 'bag:b', or 'q' to quit -->  ")
        if res == 'q':
            break
        else:
            key, val = res.split(':')
            userdict[key] = val
    
    return userdict

In [4]:
def get_userinput(user_labels):
    
    notes = 'default'
    
    valid_labels = tuple(user_labels.values())
    
    while True:
        KBN = Keyborad_Shortcut_For_Notes = '7'
        
        userinput = input(f"Valid Shortcuts: {valid_labels} <Append'{KBN}' for Notes> ... ")
        
        L = len(userinput)
        if L not in [1,2]:
            print('Invalid input, try again...')
            continue
        
        if userinput[0] not in valid_labels:
            print(f'Invalid input, valid labels are {valid_labels} try again...')
            continue
            
        if L==2:
            if userinput[1] != KBN:
                print(f"Invalid, second letter if present must be the keyboard shortcut for notes: '{KBN}' ")
            else:
                usernote  = input("Notes: ")
                if len(usernote)>0:
                    notes = usernote
                break
        
        else:
            break
        
    return userinput[0], notes
            

## Define the main driver function for labelling

In [5]:
# the list that will contain timestamps and corresponding annotations
liste_sortie_annotee=np.array(images_pola[:,2])
liste_sortie_annotee=np.c_[liste_sortie_annotee,np.zeros(liste_sortie_annotee.shape[0])]


In [6]:
def goLabel_acq(user_labels, images_pola, liste_sortie_annotee):
    #src_imgdir = f"./images/"

    timestamps=images_pola[:,2]
    # Convert timestamps to datetime objects
    datetime_objects = [datetime.strptime(timestamp, '%Y-%m-%dT%H-%M-%S') for timestamp in timestamps]

    # Round the minutes to the nearest 10 minutes
    rounded_datetimes = [dt.replace(minute=(dt.minute // 10) * 10, second=0) for dt in datetime_objects]

    # Group the timestamps into a dictionary based on rounded timestamps
    result_dict = defaultdict(list)
    result_dict_i = defaultdict(list)

    for i, rounded_dt in enumerate(rounded_datetimes):
        result_dict_i[rounded_dt].append(i)    
        result_dict[rounded_dt].append(timestamps[i])

    # Convert the defaultdict to a regular dictionary for a cleaner output (optional)
    result_dict = dict(result_dict)

    ## Print the result
    #for key, value in result_dict_i.items():
    #    print(f"{key}: {value}")
    
    
    def plot_mosaic(images, rows, cols): # Function to display images in a mosaic plot
        subsampl=4 #subsampling
        fig, axs = plt.subplots(rows, cols, figsize=(cols*4, rows*4))
        fig.suptitle(str(timestamp[0])+" ")
        for i, ax in enumerate(axs.flat):
            if i < len(images):
                img = images[i]
                ax.imshow(img[::subsampl,::subsampl], cmap='gray')  # Assuming images are grayscale
                ax.set_title("img_type "+str(image_type[i])+" \n expo : "+str(exposure[i])+" µs")


                ax.axis('off')
            else:
                ax.remove()

        return fig
    

    
    sess_count = 0
    
    for my_datetime in (list(result_dict_i.keys())):
        
        # Extract image data, exposure, and timestamp from the array
        indices_to_highlight=result_dict_i[my_datetime]
        data = images_pola[indices_to_highlight,:]
    
    
        images = data[:, 0]
        exposure = data[:, 1]
        timestamp = data[:, 2]
        image_type=data[:,3]

        N=data.shape[0]

        sess_count += 1

        # Specify the number of rows and columns in the mosaic
        cols = 4
        rows = N//cols+1


        # Display the mosaic plot
        mafig=plot_mosaic(images, rows, cols)
        display(plt.gcf()) 
        #display(Image.fromarray(img))
        print(f'Now Showing {timestamp[0]}'+" ("+str(sess_count)+"/"+str(len(list(result_dict_i.keys())))+")")
        
        label, notes = get_userinput(user_labels)
        liste_sortie_annotee[indices_to_highlight,1]=label#'c'
        print("Choisi : ",label)
        #if notes != 'default':

        
        clear_output()
        

        plt.close()
    

    print(f"You Labelled {sess_count} images in this round. Hooray!")
    return liste_sortie_annotee#Notes

## Set up labels and keyboard shortcuts

#### For example :

cloud:c

hazy:h

clear:n

In [7]:
user_labels = mkdict_from_user()

Enter 'class_labels:keyboard_shortcut', such as 'bag:b', or 'q' to quit -->   cloudy:c
Enter 'class_labels:keyboard_shortcut', such as 'bag:b', or 'q' to quit -->   hazy:h
Enter 'class_labels:keyboard_shortcut', such as 'bag:b', or 'q' to quit -->   clear:n
Enter 'class_labels:keyboard_shortcut', such as 'bag:b', or 'q' to quit -->   q


## Main driver for labelling
### Rerun the following cell as many times as needed to label more batches of images

In [8]:
labels=goLabel_acq(user_labels, images_pola, liste_sortie_annotee)


You Labelled 74 images in this round. Hooray!


In [9]:
np.save(save_path,labels)