# MoBioFP - Fingertip Quality Analysis

In [None]:
import warnings

# Suppress Pandas 3.0 PyArrow warning
warnings.filterwarnings("ignore")

import cv2
import imutils
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path

In [None]:
def plot_histograms(df: pd.DataFrame, suptitle: str = None):
    _, axes = plt.subplots(1, 3, figsize=(20, 5), constrained_layout=True)

    if suptitle:
        plt.suptitle(suptitle, fontsize=16)

    # Plot histogram with KDE for Sharpness
    sns.histplot(data=df, x="Sharpness", bins=20, kde=True, color="blue", ax=axes[0])
    axes[0].set_xlabel("Sharpness")
    axes[0].set_ylabel("Density")

    # Plot histogram with KDE for Contrast
    sns.histplot(data=df, x="Contrast", bins=20, kde=True, color="red", ax=axes[1])
    axes[1].set_xlabel("Contrast")
    axes[1].set_ylabel("Density")

    # Boxplot for Binary Mask Coverage
    sns.boxplot(data=df, x="Binary Mask Coverage", ax=axes[2], color="green")
    axes[2].set_xlabel("Binary Mask Coverage")
    axes[2].set_ylabel("Density")

    plt.show()


def plot_invalid_masks(loss_df: pd.DataFrame, fingertip_path: str, limit: int = 10):
    loss_df = loss_df.head(limit)

    images = []
    images_titles = []
    for _, row in loss_df.iterrows():
        image_file = Path(row["Image name"])
        mask_path = Path(fingertip_path) / "masks" / image_file.with_suffix(".png").name

        mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
        mask = imutils.resize(mask, width=128)
        images.append(mask)
        images_titles.append(mask_path.stem)

    # Determine the number of rows and columns for the subplot grid
    num_images = len(images)
    num_cols = 5  # Number of columns for the grid
    num_rows = math.ceil(num_images / num_cols)

    # Create a subplot grid
    fig, axes = plt.subplots(
        num_rows, num_cols, figsize=(15, 3 * num_rows), constrained_layout=True
    )
    axes = axes.ravel()  # Flatten the axes array

    # Plot the images
    for idx, (image, title) in enumerate(zip(images, images_titles)):
        axes[idx].imshow(image, cmap="gray")
        axes[idx].set_title(title, fontsize=10)
        axes[idx].axis("off")

    # Hide the remaining subplots
    for idx in range(num_images, num_cols * num_rows):
        axes[idx].axis("off")

    plt.show()


def data_loss(original_df: pd.DataFrame, filtered_df: pd.DataFrame) -> pd.DataFrame:
    original_count = len(original_df)
    filtered_count = len(filtered_df)
    data_loss = original_count - filtered_count
    percentage_loss = (data_loss / original_count) * 100

    print(f"Original dataset size: {original_count} images")
    print(f"Filtered dataset size: {filtered_count} images")
    print(f"Data loss: {data_loss} images ({percentage_loss:.2f}% loss)")

    # Extract indices of data loss images
    data_loss_indices = original_df[~original_df.index.isin(filtered_df.index)]

    # Return DataFrame containing only data loss images
    data_loss_df = original_df.loc[data_loss_indices.index]

    return data_loss_df

## Detection Fingertip Dataset

In [None]:
DETECTION_DATASET_PATH = "DETECTION_DATASET_PATH"

df_det = pd.read_csv(DETECTION_DATASET_PATH + "/quality_scores.csv")

### Show the first 5 rows of the dataset

In [None]:
df_det.head()

### Summary Statistics

In [None]:
df_det.describe()

### Initial Distribution Analysis

In [None]:
plot_histograms(df_det, "Histograms of Quality Scores after Fingertip Object Detection")

### Binary Mask Coverage Treshold Distribution Analysis and Data Loss

In [None]:
# Define the threshold for binary mask coverage
threshold = 70.0

# Filter the DataFrame based on the threshold
filtered_df = df_det[(df_det["Binary Mask Coverage"] >= threshold)]

plot_histograms(
    filtered_df,
    f"Histograms of Quality Scores after Fingertip Object Detection (Threshold: {threshold})",
)
loss_df = data_loss(df_det, filtered_df)

### Show Invalid Images

In [None]:
plot_invalid_masks(loss_df, DETECTION_DATASET_PATH)

## Segmentation Fingertip Dataset

In [None]:
SEGMENTATION_DATASET_PATH = "SEGMENTATION_DATASET_PATH"

df_seg = pd.read_csv(SEGMENTATION_DATASET_PATH + "/quality_scores.csv")

### Show the first 5 rows of the dataset

In [None]:
df_seg.head()

### Summary Statistics

In [None]:
df_seg.describe()

### Initial Distribution Analysis

In [None]:
plot_histograms(
    df_seg, f"Histograms of Quality Scores after Fingertip Semantic Segmentation"
)

### Binary Mask Coverage Treshold Distribution Analysis and Data Loss

In [None]:
# Define the threshold for binary mask coverage
threshold = 65.0

# Filter the DataFrame based on the threshold
filtered_df = df_seg[df_seg["Binary Mask Coverage"] >= threshold]

plot_histograms(
    filtered_df,
    f"Histograms of Quality Scores after Fingertip Semantic Segmentation (Threshold: {threshold})",
)
loss_df = data_loss(df_seg, filtered_df)

### Show invalid masks

In [None]:
plot_invalid_masks(loss_df, SEGMENTATION_DATASET_PATH)