# LuxonisDatasetFormat - testing embeddings methods

In [None]:
import os
import numpy as np

from matplotlib import pyplot as plt
import torch
import torch.onnx
import onnx
import onnxruntime
import torchvision

from qdrant_client.models import Distance

from luxonis_ml.data import (
    LuxonisDataset,
    LuxonisLoader,
)

from luxonis_ml.embeddings.utils.model import (
    load_model_resnet50_minuslastlayer,
    export_model_onnx,
    load_model_onnx,
    extend_output_onnx,
    load_model,
)
from luxonis_ml.embeddings.utils.embedding import (
    extract_embeddings,
    extract_embeddings_onnx,
    save_embeddings,
    load_embeddings,
)
from luxonis_ml.embeddings.utils.qdrant import QdrantManager, QdrantAPI
from luxonis_ml.embeddings.utils.ldf import generate_embeddings

from utils.data_utils import load_mnist_data

## Pytorch examples

In [None]:
# Load the data
data_loader = load_mnist_data(save_path="./mnist", num_samples=640, batch_size=64)

In [None]:
# Load the model
model = load_model_resnet50_minuslastlayer()

In [None]:
# Extract embeddings from the dataset
embeddings, labels = extract_embeddings(model, data_loader)

In [None]:
save_embeddings(embeddings, labels)

## ONNX models and Qdrant

In [None]:
# Load the data
data_loader = load_mnist_data(save_path="./mnist", num_samples=640, batch_size=64)

In [None]:
# Load the model
model = load_model()

# Export the model to ONNX
export_model_onnx(model, model_path_out="resnet50.onnx")

# Load the ONNX model
onnx_model = load_model_onnx(model_path="resnet50.onnx")

# Extend the ONNX model with an intermediate output layer
onnx_model = extend_output_onnx(
    onnx_model, intermediate_tensor_name="/Flatten_output_0"
)

# Save the ONNX model
onnx.save(onnx_model, "resnet50-1.onnx")

In [None]:
# Create an ONNX Runtime session
provider = (
    ["CUDAExecutionProvider"]
    if torch.cuda.is_available()
    and "CUDAExecutionProvider" in onnxruntime.get_available_providers()
    else None
)
ort_session = onnxruntime.InferenceSession("resnet50-1.onnx", providers=provider)

# Extract embeddings from the dataset
embeddings, labels = extract_embeddings_onnx(
    ort_session, data_loader, "/Flatten_output_0"
)

# Save the embeddings and labels to a file
save_embeddings(embeddings, labels)

In [None]:
embeddings, labels = load_embeddings()

In [None]:
# Start Qdrant docker container
QdrantManager("qdrant/qdrant", "qdrant_container2").start_docker_qdrant()

In [None]:
# Connect to Qdrant
qdrant_api = QdrantAPI("localhost", 6333, "mnist2")

In [None]:
# Create a collection
vector_size = embeddings.shape[1]
qdrant_api.create_collection(vector_size=vector_size, distance=Distance.COSINE)

# Insert the embeddings into the collection
# qdrant_api.insert_embeddings(embeddings, labels)
# qdrant_api.insert_embeddings_nooverwrite(embeddings, labels)
qdrant_api.batch_insert_embeddings_nooverwrite(embeddings, labels, batch_size=50)

In [None]:
# Search for the nearest neighbors
search_results = qdrant_api.search_embeddings(embeddings[0], top=5)

# Print the search results
print(np.array(search_results))

## LuxonisDatasetFormat, ONNX and Qdrant

In [None]:
def train_test_val_split(NUM_SAMPLES, train=0.8, val=0.1, test=0.1, seed=42):
    if train + val + test != 1.0:
        raise ValueError("TRAIN + VAL + TEST must equal 1.0")

    np.random.seed(seed)
    # generate random indices for train, val, test splits
    indices = np.random.permutation(NUM_SAMPLES)
    train_indices, val_indices, test_indices = (
        indices[: int(train * NUM_SAMPLES)],
        indices[int(train * NUM_SAMPLES) : int((train + val) * NUM_SAMPLES)],
        indices[int((train + val) * NUM_SAMPLES) :],
    )
    train_test_val = np.array(["train"] * NUM_SAMPLES)
    train_test_val[train_indices] = "train"
    train_test_val[val_indices] = "val"
    train_test_val[test_indices] = "test"

    return train_test_val

In [None]:
NUM_SAMPLES = 6400  # -1 # minus one is equivalent to all samples
BATCH_SIZE = 64
TRAIN, VAL, TEST = 0.8, 0.1, 0.1

In [None]:
# Load the MNIST data
data_loader = load_mnist_data(
    save_path="./mnist", num_samples=NUM_SAMPLES, batch_size=BATCH_SIZE
)
NUM_SAMPLES = len(data_loader.dataset)
print(f"Number of samples: {NUM_SAMPLES}")

In [None]:
# Split the data into train, val, test
train_test_val = train_test_val_split(NUM_SAMPLES, train=TRAIN, val=VAL, test=TEST)

In [None]:
# Create a tmp directory to store the images
mnist_image_dir = "./mnist_images"
if not os.path.exists(mnist_image_dir):
    os.makedirs(mnist_image_dir)

In [None]:
# Convert MNIST data to Luxonis ML format

def mnist_LDF_generator():
    batch_num = 0
    for batch in data_loader:
        images, labels = batch
        for i, (image, label) in enumerate(zip(images, labels)):
            img_ix = batch_num * BATCH_SIZE + i

            # Save image to disk
            image_path = os.path.join(mnist_image_dir, f"mnist_{img_ix}.jpg")
            torchvision.utils.save_image(image, image_path)

            # Create dictionary structure for Luxonis ML
            yield {
                "file": image_path,
                "class": str(label.item()),
                "type": "classification",
                "value": True,
            }
        batch_num += 1


# original_additions = deepcopy(additions)

In [None]:
# 2. Load the MNIST Data into LDF
dataset_name = "mnist_dataset"

# Create a new dataset in LDF
dataset = LuxonisDataset(dataset_name)

In [None]:
# Add the MNIST data to the dataset

dataset.set_classes(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
dataset.add(mnist_LDF_generator)
dataset.make_splits()

In [None]:
loader = LuxonisLoader(dataset)
for img, _ in loader:
    plt.imshow(img)
    plt.show()
    break

In [None]:
# Initialize the ONNX Runtime session for the model
provider = (
    ["CUDAExecutionProvider"]
    if torch.cuda.is_available()
    and "CUDAExecutionProvider" in onnxruntime.get_available_providers()
    else None
)
ort_session = onnxruntime.InferenceSession("resnet50-1.onnx", providers=provider)

In [None]:
# Start Qdrant docker container
QdrantManager("qdrant/qdrant", "qdrant_container2").start_docker_qdrant()

# Connect to Qdrant
qdrant_api = QdrantAPI("localhost", 6333, "mnist3")

# Create a collection
qdrant_api.create_collection(vector_size=2048, distance=Distance.COSINE)

In [None]:
# Load the LuxonisDataset
emb_dict = generate_embeddings(
    dataset, ort_session, qdrant_api, output_layer_name="/Flatten_output_0"
)

In [None]:
first_emb = None

# get a specific sample from dataset
first_sample = list(emb_dict.keys())[0]
first_emb = emb_dict[first_sample]
sample_id = first_sample

# sample_id = '64e758bdca1096d3483d18f4'
sample = dataset.fo_dataset[sample_id]
print(sample)

In [None]:
# Search for the nearest neighbors
search_results = qdrant_api.search_embeddings(np.array(first_emb), top=5)
print(np.array(search_results))

## Embeddings Processing

In [None]:
# look at the test_emb_process.ipynb for the rest of the code

## Remove everything

In [None]:
# ! rm -rf ./mnist_images
# ! rm -rf ./mnist
# ! rm ./resnet50.onnx
# ! rm ./resnet50-1.onnx
# ! rm ./embeddings.pth
# ! rm ./labels.pth

# # # Stop the Qdrant Docker container
# # stop_docker_qdrant()

# # Delete the Qdrant collection
# qdrant_client.delete_collection(collection_name="mnist2")
# qdrant_client.delete_collection(collection_name="mnist3")

In [None]:
# # Delete the Luxonis ML dataset

# dataset.delete_dataset()