08.08.2022

# Creazione del dataset da inviare a Miguel

Processing finale delle masks per poi inviarle a Miguel con i files .csv che contengono le info necessarie per ogni video.

- Load **XX_corrected_rgb_mask_SPARKS.tif** and **XX_corrected_label_mask_SPARKS.tif** and save as **XX_corrected_rgb_mask_FINAL.tif** and **XX_corrected_label_mask_FINAL.tif**.
- Cambiare gli IDs degli eventi da numeri casuali a numeri tra `5` e `#events + 4`.
- Per ogni video, creare due files .csv, salvati nella cartella `data/raw_data_and_processing/manual_corr_separated_events_masks/events_csv`
    - Il primo, `XX_events_simple.csv`, contiene linee con le informazioni seguenti (una linea per ogni evento):
        - ID (# event)
        - class
        - start frame
        - end frame
        - x center of mass
        - y center of mass
    - Il secondo, `XX_events_full.csv`, contiene le linee seguenti (più linee per lo stesso evento):
        - ID (# event)
        - class
        - x_array
        - y_array
        - Frame

**REMARK**: qui c'è scritto come trovare il center of mass delle ROIs

https://stackoverflow.com/questions/38598690/how-to-find-the-diameter-of-objects-using-image-processing-in-python

In [1]:
# autoreload is used to reload modules automatically before entering the
# execution of code typed at the IPython prompt.
%load_ext autoreload
%autoreload 2
# To import modules from parent directory in Jupyter Notebook
import sys

sys.path.append("..")

In [1]:
import os
import imageio
import napari
import pandas as pd

import numpy as np
import cc3d
from scipy import ndimage

from utils.visualization_tools import get_discrete_cmap
from utils.in_out_tools import load_movies_ids, load_annotations_ids
from data.data_processing_tools import get_event_parameters, get_event_parameters_simple

In [138]:
movie_ids = [
    "01",
    "02",
    "03",
    "04",
    "05",
    "06",
    "07",
    "08",
    "09",
    "10",
    "11",
    "12",
    "13",
    "14",
    "15",
    "16",
    "17",
    "18",
    "19",
    "20",
    "21",
    "22",
    "23",
    "24",
    "25",
    "27",
    "28",
    "29",
    "30",
    "32",
    "33",
    "34",
    "35",
    "36",
    "38",
    "39",
    "40",
    "41",
    "42",
    "43",
    "44",
    "45",
    "46",
]
# movie_ids = ['07']

In [20]:
data_dir = os.path.join("..", "data", "raw_data_and_processing")

### directory where corrected masks are saved

In [5]:
# directory where corrected movies are saved
corr_dir = os.path.join(data_dir, "manual_corr_separated_event_masks")
os.makedirs(corr_dir, exist_ok=True)

### output directory for csv files

In [6]:
out_dir = os.path.join(corr_dir, "events_csv")
os.makedirs(out_dir, exist_ok=True)

## Load annotation masks (with suffix `_SPARKS`)

In [21]:
event_labels = load_annotations_ids(
    data_folder=corr_dir, ids=movie_ids, mask_names="corrected_rgb_mask_SPARKS"
)
class_labels = load_annotations_ids(
    data_folder=corr_dir, ids=movie_ids, mask_names="corrected_label_mask_SPARKS"
)

## Check some constraints and save annotation masks with suffix `_FINAL` 

### check that event IDs are > 4

In [77]:
for movie_id in movie_ids:
    events_mask = event_labels[movie_id]

    if np.min(events_mask) >= 4:
        print(f"Events mask in movie {movie_id} contain an event ID lower than 5.")

### check that event and class ROIs match

In [78]:
for movie_id in movie_ids:
    events_mask = event_labels[movie_id]
    classes_mask = class_labels[movie_id]

    # check that events_mask and classes_mask match
    if not (events_mask.astype(bool) == classes_mask.astype(bool)).all():
        print(f"RGB and class masks do not match in movie {movie_id}")
        print(np.where(events_mask.astype(bool) != classes_mask.astype(bool)))

### check that each event is a single connected component

In [79]:
# connectivity of the connected components
connectivity = 26

In [80]:
white_int = 255 * 255 * 255 + 255 * 255 + 255
print("white colour:", white_int)

white colour: 16646655


In [81]:
separated_events = {}

for movie_id in movie_ids:
    # print("Movie", movie_id)
    events_mask = event_labels[movie_id]
    classes_mask = class_labels[movie_id]

    # check that int rgs label does not contain white_int
    assert not white_int in events_mask

    # get list of events
    list_events = list(np.unique(events_mask))
    list_events.remove(0)

    # create dict entry to store events that are separated in more than one conn comp
    separated_events[movie_id] = []

    # compute number of connected components for each event
    for nb_event in list_events:
        events, n_components = cc3d.connected_components(
            (events_mask == nb_event), connectivity=connectivity, return_N=True
        )
        if n_components > 1:
            print(
                f"Event with ID {nb_event} in movie {movie_id} contains {n_components} connected components"
            )
            separated_events[movie_id].append([nb_event, n_components])

### check that each event belongs to a unique class

In [82]:
multiclass_events = {}

for movie_id in movie_ids:
    # print("Movie", movie_id)
    events_mask = event_labels[movie_id]
    classes_mask = class_labels[movie_id]

    # get list of events
    list_events = list(np.unique(events_mask))
    list_events.remove(0)

    # create dict entry to store events that are separated in more than one class
    multiclass_events[movie_id] = []

    # compute number of classes for each event
    for nb_event in list_events:
        # get class mask of event
        event_class_mask = np.where(events_mask == nb_event, classes_mask, 0)

        # get number of classes
        list_classes = list(np.unique(event_class_mask))
        list_classes.remove(0)

        if len(list_classes) > 1:
            print(
                f"Event with ID {nb_event} in movie {movie_id} contains {len(list_classes)} classes: {list_classes}"
            )
            multiclass_events[movie_id].append([nb_event, list_classes])

### General tools 

#### Create LUT for smooth movie

In [43]:
cmap = get_discrete_cmap(name="gray", lut=16)

### correct mistakes wrt constraints interactively

In [39]:
# set smooth movies directory
smooth_movies_dir = os.path.join(data_dir, "smoothed_movies")

In [58]:
movie_id = "24"

In [59]:
# load smooth movie
movie = load_movies_ids(
    data_folder=smooth_movies_dir,
    ids=[movie_id],
    names_available=True,
    movie_names="smoothed_video",
)[movie_id]

In [60]:
events_mask = event_labels[movie_id]
classes_mask = class_labels[movie_id]

In [61]:
event_id = 6

In [62]:
# get event mask
event_mask = np.where(events_mask == event_id, events_mask, 0)

print(
    f"frames containing event {event_id}: {list(np.unique((np.where(event_mask == event_id)[0])))}"
)

viewer = napari.Viewer()
viewer.add_image(movie, name="smooth movie", colormap=("colors", cmap))

viewer.add_labels(events_mask, name="events labels", opacity=0.5, visible=False)

viewer.add_labels(classes_mask, name="classes labels", opacity=0.5, visible=False)

viewer.add_labels(event_mask, name="event mask", opacity=0.6, visible=True)



frames containing event 6: [558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600]


<Labels layer 'event mask' at 0x18ddf4f41f0>

In [52]:
# get connected components locations
ccs, n_components = cc3d.connected_components(
    (events_mask == event_id), connectivity=connectivity, return_N=True
)

In [54]:
viewer.add_labels(ccs, name="connected components", opacity=0.6, visible=True)

<Labels layer 'connected components' at 0x18d8f3aafd0>

In [66]:
# get mask of different classes
event_class_mask = np.where(events_mask == event_id, classes_mask, 0)
list_classes = list(np.unique(event_class_mask))
list_classes.remove(0)
print(f"Classes present in event {event_id}: {list_classes}")

Classes present in event 6: [1, 3]


In [65]:
viewer.add_labels(
    event_class_mask, name="event classes mask", opacity=0.6, visible=True
)

<Labels layer 'event classes mask' at 0x18de08407f0>

In [67]:
# get list of events in movie
print(f"Events present in movie {movie_id}: {np.unique(events_mask)}")

Events present in movie 24: [       0        5        6        7        8        9       10       11
    29020   336152   733150   891478  1076914  1813672  1887795  1911288
  2042280  2088233  2466339  2559776  2930558  3086049  3144602  3244765
  3762786  3808646  4028392  4326235  4693472  5598047  5686279  5694383
  6321154  6587599  6639157  6732689  7056378  7192807  7608053  8178442
  8614839  8894431  8973605  9976188 10832149 11148655 12631210 14061318
 14098982 14361999 14432587 14747394 14788796 14926727 15520080 15747794
 15944341 16515630 16567286]


In [68]:
# save corrected rgb labels and class labels on disk
imageio.volwrite(
    os.path.join(corr_dir, movie_id + "_corrected_rgb_mask_FINAL.tif"), events_mask
)
imageio.volwrite(
    os.path.join(corr_dir, movie_id + "_corrected_label_mask_FINAL.tif"), classes_mask
)

### Change event IDs from random numbers to values between `5` and `#events + 4`

In [75]:
for movie_id in movie_ids:
    events_mask = event_labels[movie_id]
    new_events_mask = np.copy(events_mask)

    # get list of events
    list_events = list(np.unique(events_mask))
    list_events.remove(0)

    print(f"Events present in movie {movie_id} (containing {len(list_events)} events):")
    print(list_events)

    for i, nb_event in enumerate(list_events):
        # get class mask of event
        new_events_mask = np.where(events_mask == nb_event, i + 5, new_events_mask)

    event_labels[movie_id] = new_events_mask

    new_list_events = list(np.unique(event_labels[movie_id]))
    new_list_events.remove(0)

    print(
        f"Number of events after renumbering: {len(new_list_events)} (values between {min(new_list_events)} and {max(new_list_events)})"
    )
    print()

    assert max(new_list_events) - min(new_list_events) + 1 == len(list_events)

Events present in movie 01 (containing 30 events):
[5, 6, 7, 1213085, 1460377, 1763421, 2177621, 2997931, 3865116, 4641927, 4744257, 5047384, 6923320, 7217616, 7235208, 7607517, 9047504, 9985923, 10065583, 10095167, 10331602, 11014150, 11515293, 11948079, 13311101, 13600822, 13703862, 14663886, 15403210, 15706005]
Number of events after renumbering: 30 (values between 5 and 34)

Events present in movie 02 (containing 16 events):
[5, 6, 7, 203962, 399774, 583189, 2574393, 3354380, 3974382, 4192011, 4698155, 5313404, 5908962, 7529393, 10252693, 12837713]
Number of events after renumbering: 16 (values between 5 and 20)

Events present in movie 03 (containing 47 events):
[83436, 102422, 212722, 1391253, 1484434, 1582027, 2072030, 2132871, 3093166, 3223489, 3622575, 4089246, 4253946, 4410167, 4563090, 5284908, 5831824, 6013718, 6204449, 6392992, 6547268, 6732680, 7564040, 8355437, 8496280, 8587972, 8965382, 9668959, 9711696, 10038753, 10849990, 11404912, 11539250, 12454863, 12532186, 127701

### save final version of class and event masks

In [76]:
for movie_id in movie_ids:
    events_mask = event_labels[movie_id]
    classes_mask = class_labels[movie_id]

    # save corrected rgb labels and class labels on disk
    imageio.volwrite(
        os.path.join(corr_dir, movie_id + "_corrected_rgb_mask_FINAL.tif"), events_mask
    )
    imageio.volwrite(
        os.path.join(corr_dir, movie_id + "_corrected_label_mask_FINAL.tif"),
        classes_mask,
    )

## Create .csv files

### for each movie, create dataframes with simple and full event params and save them as .csv files

In [130]:
simple_filename_base = "_events_simple.csv"
full_filename_base = "_events_full.csv"

In [141]:
simple_cols = [
    "Movie ID",
    "# event",
    "class",
    "start",
    "end",
    "x center of mass",
    "y center of mass",
]
full_cols = ["Movie ID", "# event", "class", "x array", "y array", "frame"]

for movie_id in movie_ids:
    print(f"Processing {movie_id}...")

    # create pandas dataframes
    simple_df = pd.DataFrame(columns=simple_cols)
    full_df = pd.DataFrame(columns=full_cols)

    # open events mask and label mask
    events_mask = load_annotations_ids(
        data_folder=corr_dir, ids=[movie_id], mask_names="corrected_rgb_mask_FINAL"
    )[movie_id]
    classes_mask = load_annotations_ids(
        data_folder=corr_dir, ids=[movie_id], mask_names="corrected_label_mask_FINAL"
    )[movie_id]

    # get list of event IDs
    events_ids = list(np.unique(events_mask))
    events_ids.remove(0)

    # add each event to the dataframe
    for event_id in events_ids:
        event_mask = events_mask == event_id

        # get class of event
        event_class_mask = np.where(event_mask, classes_mask, 0)
        event_class = list(np.unique(event_class_mask))
        event_class.remove(0)
        assert len(event_class) == 1
        event_class = event_class[0]

        # update simple dataframe
        start, end, x_center, y_center = get_event_parameters_simple(event_mask)

        df = pd.DataFrame(
            [[movie_id, event_id, event_class, start, end, x_center, y_center]],
            columns=simple_cols,
        )
        simple_df = simple_df.append(df)

        # update full dataframe
        coord_list = get_event_parameters(event_mask)

        for x_array, y_array, frame in coord_list:
            df = pd.DataFrame(
                [[movie_id, event_id, event_class, x_array, y_array, frame]],
                columns=full_cols,
            )
            full_df = full_df.append(df)

        # save dataframes to csv files
        simple_df.to_csv(
            os.path.join(out_dir, movie_id + simple_filename_base), index=False
        )

        full_df.to_csv(
            os.path.join(out_dir, movie_id + full_filename_base), index=False
        )

Processing 01...
Processing 02...
Processing 03...
Processing 04...
Processing 05...
Processing 06...
Processing 07...
Processing 08...
Processing 09...
Processing 10...
Processing 11...
Processing 12...
Processing 13...
Processing 14...
Processing 15...
Processing 16...
Processing 17...
Processing 18...
Processing 19...
Processing 20...
Processing 21...
Processing 22...
Processing 23...
Processing 24...
Processing 25...
Processing 27...
Processing 28...
Processing 29...
Processing 30...
Processing 32...
Processing 33...
Processing 34...
Processing 35...
Processing 36...
Processing 38...
Processing 39...
Processing 40...
Processing 41...
Processing 42...
Processing 43...
Processing 44...
Processing 45...
Processing 46...
