In [1]:
!pip install fingerprint_enhancer

Collecting fingerprint_enhancer
  Downloading fingerprint_enhancer-0.0.14.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fingerprint_enhancer
  Building wheel for fingerprint_enhancer (setup.py) ... [?25l[?25hdone
  Created wheel for fingerprint_enhancer: filename=fingerprint_enhancer-0.0.14-py3-none-any.whl size=10629 sha256=75efd8853305ab789079a0d0096e7e96fd24cb7c25ea62a7164932f76f5850f6
  Stored in directory: /root/.cache/pip/wheels/3b/de/af/ea18bac41054916ac77ae32dfb8544155d758734b91266315d
Successfully built fingerprint_enhancer
Installing collected packages: fingerprint_enhancer
Successfully installed fingerprint_enhancer-0.0.14


In [2]:
!pip install fingerprint_feature_extractor

Collecting fingerprint_feature_extractor
  Downloading fingerprint-feature-extractor-0.0.10.tar.gz (4.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fingerprint_feature_extractor
  Building wheel for fingerprint_feature_extractor (setup.py) ... [?25l[?25hdone
  Created wheel for fingerprint_feature_extractor: filename=fingerprint_feature_extractor-0.0.10-py3-none-any.whl size=4829 sha256=a8269b78a1568617ea614c401994eb622e6ef4cc2e120d1d9cb7c080f82f5eaa
  Stored in directory: /root/.cache/pip/wheels/30/85/fd/31e1eefbb140c9a611a41c2819bd2186e1384c108eff4d10e1
Successfully built fingerprint_feature_extractor
Installing collected packages: fingerprint_feature_extractor
Successfully installed fingerprint_feature_extractor-0.0.10


In [3]:
!pip install pycpd

Collecting pycpd
  Downloading pycpd-2.0.0-py3-none-any.whl.metadata (2.8 kB)
Downloading pycpd-2.0.0-py3-none-any.whl (10 kB)
Installing collected packages: pycpd
Successfully installed pycpd-2.0.0


In [4]:
import cv2
from fingerprint_enhancer import enhance_fingerprint
import fingerprint_feature_extractor
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List
from scipy.spatial import cKDTree
from math import radians, degrees, cos, sin, atan2
from sklearn.linear_model import RANSACRegressor
from scipy.signal import find_peaks
from pycpd import RigidRegistration
from scipy.spatial.distance import cdist
import os
from google.colab import files

In [5]:
def align_labeled_point_sets(A_T, A_F, B_T, B_F):
    """
    Aligns point set B (with T and F labeled subgroups) to point set A.

    Args:
        A_T, A_F: list of (x, y) tuples for A's T and F subgroups
        B_T, B_F: list of (x, y) tuples for B's T and F subgroups

    Returns:
        aligned_B_T, aligned_B_F: transformed versions of B_T and B_F
    """
    # Combine A and B into full arrays
    A_combined = np.array(A_T + A_F, dtype=np.float64)
    B_combined = np.array(B_T + B_F, dtype=np.float64)

    # Keep track of label boundaries
    len_T = len(B_T)

    # Align B_combined to A_combined
    reg = RigidRegistration(X=A_combined, Y=B_combined)
    TY, _ = reg.register()

    # Split the aligned result
    aligned_B_T = [tuple(pt) for pt in TY[:len_T]]
    aligned_B_F = [tuple(pt) for pt in TY[len_T:]]

    return aligned_B_T, aligned_B_F

In [6]:
def nearest_neighbor_distances(A, B_aligned, bin_method="fd"):
    A = np.array(A)
    B_aligned = np.array(B_aligned)

    tree = cKDTree(A)
    distances, _ = tree.query(B_aligned)

    bins = optimal_bins(distances, method=bin_method)

    stats = {
        "mean": np.mean(distances),
        "median": np.median(distances),
        "std": np.std(distances),
        "min": np.min(distances),
        "max": np.max(distances),
        "num_points": len(distances),
        "within_1_unit": np.sum(distances < 1.0),
        "within_5_units": np.sum(distances < 5.0)
    }

    return distances, stats

In [7]:
def find_threshold(distances):
    bins = optimal_bins(distances, method="fd")
    hist, bin_edges = np.histogram(distances, bins, density=True)

    peaks, _ = find_peaks(hist)

    if len(peaks) == 0:
        print("No peaks found in histogram.")
        return None

    # First peak = smallest nonzero mode in distances
    first_peak_index = peaks[0]
    peak_value = hist[first_peak_index]

    half_max = peak_value / 2

    # Try finding where histogram falls below half max to the right of the peak
    right_half = np.where(hist[first_peak_index:] <= half_max)[0]

    if right_half.size > 0:
        right_idx = right_half[0] + first_peak_index
        reason = "drop below half-max"
    else:
        # Try to find a local minimum (valley) after the peak
        valleys, _ = find_peaks(-hist[first_peak_index:])
        if valleys.size > 0:
            right_idx = valleys[0] + first_peak_index
            reason = "first valley after peak"
        else:
            # Final fallback: fixed bin offset
            fallback_offset = 5
            right_idx = min(first_peak_index + fallback_offset, len(bin_edges) - 2)
            reason = "fixed offset fallback"

    threshold_x = bin_edges[right_idx]
    return threshold_x


In [8]:
def optimal_bins(data, method="fd"):
    n = len(data)
    data = np.asarray(data)
    range_ = np.max(data) - np.min(data)

    if method == "sturges":
        return int(np.ceil(np.log2(n) + 1))
    elif method == "rice":
        return int(np.ceil(2 * n ** (1/3)))
    elif method == "scott":
        bin_width = 3.5 * np.std(data) / (n ** (1/3))
    elif method == "fd":
        q75, q25 = np.percentile(data, [75 ,25])
        iqr = q75 - q25
        bin_width = 2 * iqr / (n ** (1/3))
    else:
        raise ValueError("Unknown method for bin selection.")

    return int(np.ceil(range_ / bin_width)) if bin_width > 0 else 10

In [9]:
def pair_points(A, B, threshold):
    """
    Pairs points from A and B based on a distance threshold.

    Parameters:
    - A (np.ndarray): 2D array or list of points (x, y) in the first set.
    - B (np.ndarray): 2D array or list of points (x, y) in the second set.
    - threshold (float): The maximum distance allowed for a pair to be considered.

    Returns:
    - paired_A (list): List of points from A that are paired with points from B.
    - paired_B (list): List of points from B that are paired with points from A.
    """

    # Convert to numpy arrays if A and B are lists
    A = np.array(A)
    B = np.array(B)

    # Compute all pairwise distances between points in A and B
    distances = cdist(A, B)

    # Find the points that can be paired based on the threshold
    pairs = np.where(distances <= threshold)

    # Extract the paired points from A and B
    paired_A = A[pairs[0]]
    paired_B = B[pairs[1]]

    return paired_A, paired_B

In [10]:
def compare_minutiae(db_terms, db_bifs, terminations, bifurcations):

    aligned_terminations, aligned_bifurcations = align_labeled_point_sets(db_terms, db_bifs, terminations, bifurcations)

    original_reference_points = np.vstack([db_terms, db_bifs])
    aligned_tested_points = np.vstack([aligned_terminations, aligned_bifurcations])

    distances, stats = nearest_neighbor_distances(original_reference_points, aligned_tested_points, bin_method="fd")

    threshold = find_threshold(distances)

    mse_val = None
    smape_val = None

    if threshold is not None:
        paired_A, paired_B = pair_points(original_reference_points, aligned_tested_points, threshold)
        mse_val = mse_diff(paired_A, paired_B)
        smape_val = smape_diff(paired_A, paired_B)
        return mse_val, smape_val
    else:
        return None, None

In [11]:
def get_best_matches(image, minutiae_db, spurious_threshold):
    enhanced_image = enhance_fingerprint(image)
    enhanced_image = (enhanced_image * 255).astype(np.uint8)
    terminations, bifurcations = fingerprint_feature_extractor.extract_minutiae_features(enhanced_image, spuriousMinutiaeThresh=spurious_threshold, invertImage=False, showResult=False, saveResult=False)

    terminations_coords = [(f.locX, f.locY) for f in terminations]
    bifurcations_coords = [(f.locX, f.locY) for f in bifurcations]

    scores = []


    for label, entry in minutiae_db.items():
        # Accumulate terminations if 'T' exists
        db_terms = []  # To accumulate all terminations
        db_bifs = []   # To accumulate all bifurcations

        if 'T' in entry:
            db_terms.extend(entry['T'])  # Directly extend with the list of terminations

        # Accumulate bifurcations if 'B' exists
        if 'B' in entry:
            db_bifs.extend(entry['B'])  # Directly extend with the list of bifurcations

        mse, smape = compare_minutiae(db_terms, db_bifs, terminations_coords, bifurcations_coords)
        if mse is None or smape is None:
            continue
        scores.append((label, mse, smape))
        top_10_scores = sorted(scores, key=lambda x: x[2])[:10]

    return top_10_scores

In [12]:
def mse_diff(set_a, set_b):
    # Assuming set_a and set_b are lists of matched points (x, y)
    differences = np.array(set_a) - np.array(set_b)
    squared_diff = np.sum(differences ** 2, axis=1)
    mse_value = np.mean(squared_diff)
    return mse_value

In [13]:
def smape_diff(set_a, set_b):
    set_a, set_b = np.array(set_a), np.array(set_b)
    diff = np.abs(set_a - set_b)
    denominator = (np.abs(set_a) + np.abs(set_b)) / 2.0
    return np.mean(2 * diff / denominator) * 100

In [14]:
def load_database(csv_path: str) -> Dict[str, Tuple[List[Tuple[int, int, float]], List[Tuple[int, int, Tuple[float, float, float]]]]]:
    """
    Loads a CSV database and returns a dictionary grouped by image_id.
    The dictionary contains tuples of (terminations, bifurcations) for each fingerprint.
    """
    # Load the CSV into a pandas DataFrame
    df = pd.read_csv(csv_path)

    # Group by 'image_id' to process each image's minutiae separately
    grouped = df.groupby('image_id')

    database = {}

    # Loop over each grouped image_id
    for image_id, group in grouped:
        terminations = []
        bifurcations = []

        # Process each row in the group (terminations and bifurcations)
        for _, row in group.iterrows():
            x, y = row['x'], row['y']

            if row['type'] == 'T':  # Termination
                theta = row['theta1']  # Orientation angle for termination
                terminations.append((x, y))
            elif row['type'] == 'B':  # Bifurcation
                thetas = row['theta1'], row['theta2'], row['theta3']  # 3 angles for bifurcation
                bifurcations.append((x, y))

        # Store the terminations and bifurcations for this image_id
        database[image_id] = {'T' : terminations, 'B': bifurcations}

    return database

## Incarcarea bazei de date

In [15]:
uploaded_csv = files.upload()

filename = next(iter(uploaded_csv))
print(f"Uploaded file: {filename}")
minutiae_db =  load_database(filename)

for image in minutiae_db.keys():
  print(f"Label: {image} Terminations: {len(minutiae_db[image]['T'])}  Bifurcations: {len(minutiae_db[image]['B'])}")


Saving minutiae_small_10.csv to minutiae_small_10.csv
Uploaded file: minutiae_small_10.csv
Label: 012_3_1.tif Terminations: 48  Bifurcations: 81
Label: 013_5_3.tif Terminations: 64  Bifurcations: 45
Label: 017_3_1.tif Terminations: 36  Bifurcations: 46
Label: 022_3_2.tif Terminations: 56  Bifurcations: 50
Label: 027_4_2.tif Terminations: 48  Bifurcations: 53
Label: 045_3_1.tif Terminations: 35  Bifurcations: 41
Label: 057_3_6.tif Terminations: 33  Bifurcations: 67
Label: 076_3_5.tif Terminations: 41  Bifurcations: 86
Label: 101_1.tif Terminations: 63  Bifurcations: 10
Label: 102_1.tif Terminations: 45  Bifurcations: 44
Label: 103_1.tif Terminations: 62  Bifurcations: 25
Label: 103_2.tif Terminations: 68  Bifurcations: 19
Label: 103_3.tif Terminations: 41  Bifurcations: 9
Label: 103_4.tif Terminations: 66  Bifurcations: 15
Label: 105_6.tif Terminations: 36  Bifurcations: 27
Label: 106_2.tif Terminations: 58  Bifurcations: 7
Label: 107_8.tif Terminations: 49  Bifurcations: 37
Label: 109_

## Incarcarea imaginii de test

In [16]:
uploaded_img = files.upload()

for fn in uploaded_img.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded_img[fn])))

for fn in uploaded_img.keys():
    # Extract the file name without path
    first_image_name = os.path.basename(fn)

first_image = cv2.imdecode(np.frombuffer(uploaded_img[fn], np.uint8), cv2.IMREAD_UNCHANGED)

Saving 103_1_r_35_b.png to 103_1_r_35_b.png
User uploaded file "103_1_r_35_b.png" with length 105292 bytes


In [17]:
top10 = get_best_matches(first_image, minutiae_db, 10)


  self._mask = erosion(self._mask, square(5))  # Structuing element for mask erosion = square(5)


In [39]:
for image in minutiae_db.keys():
  print(f"Label: {image} Terminations: {len(minutiae_db[image]['T'])}  Bifurcations: {len(minutiae_db[image]['B'])}")



Label: 012_3_1.tif Terminations: 48  Bifurcations: 81
Label: 013_5_3.tif Terminations: 64  Bifurcations: 45
Label: 017_3_1.tif Terminations: 36  Bifurcations: 46
Label: 022_3_2.tif Terminations: 56  Bifurcations: 50
Label: 027_4_2.tif Terminations: 48  Bifurcations: 53
Label: 045_3_1.tif Terminations: 35  Bifurcations: 41
Label: 057_3_6.tif Terminations: 33  Bifurcations: 67
Label: 076_3_5.tif Terminations: 41  Bifurcations: 86
Label: 101_1.tif Terminations: 63  Bifurcations: 10
Label: 102_1.tif Terminations: 45  Bifurcations: 44
Label: 103_1.tif Terminations: 62  Bifurcations: 25
Label: 103_2.tif Terminations: 68  Bifurcations: 19
Label: 103_3.tif Terminations: 41  Bifurcations: 9
Label: 103_4.tif Terminations: 66  Bifurcations: 15
Label: 105_6.tif Terminations: 36  Bifurcations: 27
Label: 106_2.tif Terminations: 58  Bifurcations: 7
Label: 107_8.tif Terminations: 49  Bifurcations: 37
Label: 109_2.tif Terminations: 48  Bifurcations: 49
Label: 110_1.tif Terminations: 59  Bifurcations: 4