# FiftyOne

## Compute uniqueness

Already included this in the script.

In [None]:
import fiftyone as fo
import fiftyone.utils.yolo as fouy

# for computing embeddings
import fiftyone.brain as fob
import fiftyone.zoo as foz
import pickle

# for filtering datasets to create views + tags
from fiftyone import ViewField as F

classes = ['Pasiphaea', 'Poeobius meseres', 'Siphonophorae', 'Ctenophora', 'Medusae', 'Eusergestes similis', 'Octopus', 'Larvacean', 'Fish', 'Squid', 'Mysida', 'Worm', 'Echinoderm', 'Other', 'Crustacea', 'Anemone', 'Equipment', 'Coral', 'Sponge', 'Pennatulacea', 'Euphausiacea']

existent_datasets = fo.list_datasets()
print(existent_datasets)

In [None]:
# for ds in existent_datasets:
#     dataset = fo.load_dataset(ds)
#     dataset.delete()

# existent_datasets = fo.list_datasets()
# print(existent_datasets)

In [None]:
# dataset2 = fo.load_dataset("2023.04.07.17.29.18")
# dataset2.delete()

## All in one

In [None]:
dataset_name = "2023.04.07.19.54.02"
dataset_dir="/mnt/c/Users/sabri/Documents/github/thesis/datasets/raw"

### Create dataset from image directory

In [None]:
# dataset = fo.Dataset.from_images_dir(dataset_dir)

# print(f"Dataset created: {dataset_name}.\n")

### Add ground_truth for all images

In [None]:
# # add labels
# import fiftyone.utils.yolo as fouy

# fouy.add_yolo_labels(
#     sample_collection=dataset, 
#     label_field="ground_truth",
#     labels_path="/mnt/c/Users/sabri/Documents/github/thesis/datasets/raw/labels",
#     classes=classes,
#     )

In [None]:
# dataset.persistent = True

In [None]:
dataset = fo.load_dataset(dataset_name)

In [None]:
fo.pprint(dataset.stats(include_media=True))

In [None]:
print(dataset.first())

In [None]:
# dataset.delete_sample_field('predictions_tflite16') 

## Tag dataset

In [None]:
import glob
import os
from tqdm import tqdm
import fiftyone as fo

def tag_samples(dataset_name, processed_dir):
    """
    This function tags samples in a given FiftyOne dataset with the tag names extracted from the file names of the provided directory.

    Args:
    dataset_name (str): The name of the FiftyOne dataset to be updated with tags.
    processed_dir (str): The path to the directory containing files with the image file paths. The file names without the extension will be used as tag names.

    Functionality:
    1. Loads the specified dataset using FiftyOne.
    2. Iterates through all the files in the given directory, recursively.
    3. Extracts the tag name from the file name without extension.
    4. Opens the file and iterates through the image file paths.
    5. Matches the image path in the dataset and retrieves the sample.
    6. Appends the tag name to the sample's tag list, if not already present, and saves the sample.
    7. In case of any ValueError, prints an error message with the affected image path.
    """
    dataset = fo.load_dataset(dataset_name)
    for file_path in glob.glob(processed_dir, recursive=True):
        print(f"Getting images in {file_path}")
        tag_name = os.path.splitext(os.path.basename(file_path))[0]

        with open(file_path, 'r') as file:
            for line in tqdm(file):
                image_path = line.strip()
                try:
                    sample = dataset.match({"filepath": image_path}).first()
                    if tag_name not in sample.tags:
                        sample.tags.append(tag_name)
                        sample.save()
                except ValueError as e:
                    print(f"Error: {e} at {image_path} in {file_path}")
                    break

dataset_name = "2023.04.07.19.54.02"
processed_dir = '/mnt/c/Users/sabri/Documents/github/thesis/datasets/processed/**/*.txt'

# already done, so comment out
# tag_samples(dataset_name, processed_dir)

## Add predictions

More info here: https://docs.voxel51.com/user_guide/dataset_creation/index.html#model-predictions

### AUV predictions

In [None]:
# fouy.add_yolo_labels(
#     sample_collection=dataset, 
#     label_field="predictions_onnx",
#     labels_path="/mnt/c/Users/sabri/Documents/github/thesis/artifacts/predictions/predict_onnx/labels",
#     classes=classes,
#     )

In [None]:
# fouy.add_yolo_labels(
#     sample_collection=dataset, 
#     label_field="predictions_tflite16",
#     labels_path="/mnt/c/Users/sabri/Documents/github/thesis/artifacts/predictions/predict_tflite16/labels",
#     classes=classes,
#     )

In [None]:
# Launch App instance
session = fo.launch_app(dataset)

In [None]:
session.close()

## Views

In [None]:
view1 = dataset.filter_labels("ground_truth", F("label") == "Eusergestes similis") # not what i want. excludes other labels, I want to keep them like in 'shrimp'
print(len(view1))
# dataset.save_view("shrimp_2", view1)

In [None]:
dataset.list_saved_views()

In [None]:
dataset.get_saved_view_info('shrimp')

## Embeddings
How to do this explained here: https://docs.voxel51.com/tutorials/image_embeddings.html

In [None]:
# Open the file in binary mode
with open('embeddings.pkl', 'rb') as file:
    # Call load method to deserialze
    embeddings = pickle.load(file)

### Compute embeddings
I have already done this and pickled it.

In [None]:
# import fiftyone.zoo as foz

# # Compute embeddings
# # You will likely want to run this on a machine with GPU, as this requires
# # running inference on 10,000 images
# model = foz.load_zoo_model("mobilenet-v2-imagenet-torch")
# embeddings = dataset.compute_embeddings(model)

# # Open a file and use dump()
# with open('embeddings.pkl', 'wb') as file:
#     # A new file will be created
#     pickle.dump(embeddings, file)

In [None]:
# Compute visualization
results = fob.compute_visualization(
    dataset, embeddings=embeddings, seed=42, brain_key="img_viz"
)

In [None]:
# Object patch embeddings
fob.compute_visualization(
    dataset, patches_field="ground_truth", brain_key="gt_viz"
)

In [None]:
print(type(results))
print(results.points.shape)

In [None]:
fob.compute_uniqueness(dataset, embeddings=embeddings)

In [None]:
fob.compute_similarity(dataset, embeddings=embeddings)

In [None]:
dataset.save()

Computing patch embeddings is breaking the kernel.

In [None]:
# %%capture
# dataset.compute_patch_embeddings(
#     model, 
#     "ground_truth", 
#     embeddings_field = "gt_embed"
# )

In [None]:
dataset.list_brain_runs()

In [None]:
dataset.count_values("ground_truth.detections.label")

In [None]:
results = dataset.load_brain_results("img_viz")

In [None]:
# Plot embeddings colored by ground truth label
plot = results.visualize(labels="ground_truth.detections.label")
plot.show(height=520)

# # Attach plot to session
# session.plots.attach(plot)

In [None]:
# Launch App instance
session = fo.launch_app(dataset)

In [None]:
from fiftyone import ViewField as F

auv_view = dataset.match(F("filepath").contains_str("output"))
auv_view.tag_samples("auv")

In [None]:
rov_view = dataset.match(~F("filepath").contains_str("output"))
rov_view.tag_samples("rov")

## Aggragation

In [None]:
# Compute the number of samples in the dataset
count = dataset.count()
print(count)
# 200

# Compute the number of samples with `predictions`
count = dataset.count("predictions_auv450")
print(count)
# 200

# Compute the number of detections in the `ground_truth` field
count = dataset.count("ground_truth.detections")
print(count)

In [None]:
dataset.count_values("predictions_auv450.detections.label")

In [None]:
dataset.count_values("ground_truth.detections.label")

In [None]:
dataset.bounds("uniqueness")

In [None]:
dataset.count_sample_tags()

In [None]:
train25 = dataset.match_tags("train25")

In [None]:
train25.list_aggregations()

In [None]:
train25.mean("uniqueness")