In [None]:
from os import path
# If running on colab: the first time download and unzip additional files
#!unzip '/content/drive/MyDrive/SocoFing.zip'

In [None]:
import math
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from ipywidgets import interact
import os

import argparse
import sys
from os import listdir
from os.path import isfile, join
from typing import Dict, List, Tuple

import imagehash
import numpy as np
from PIL import Image
from aiohttp import Fingerprint

###Encoding Keypoint information with D-Hash

In [None]:
count2,negc2=0,0
fing1,fing2=0,0
sift = cv.SIFT_create()
def minutiae_hash(m1):
    test=np.full((97), False, dtype=bool)
    test2=np.full((104), False, dtype=bool)
    for i in range(len(m1)):
        t1 = m1[i][0]
        t2 = m1[i][1]

        test[t1]=1
        test2[t2]=1
    b2 = np.append(test,test2)
    return b2

def calculate_signature(image_file: str, hash_size: int) -> np.ndarray:
    """
    Calculate the dhash signature of a given file

    Args:
        image_file: the image (path as string) to calculate the signature for
        hash_size: hash size to use, signatures will be of length hash_size^2

    Returns:
        Image signature as Numpy n-dimensional array or None if the file is not a PIL recognized image
    """
    global sift
    filename = image = kp1 = kp2 = mp = None
    hold_cord1=[]
    sig = []
    hash_size2=128
    pil_image = Image.open(image_file).convert("L").resize(
                        (hash_size2+1, hash_size2),
                        Image.ANTIALIAS)
    dhash = imagehash.dhash(pil_image, hash_size2)
    signature = dhash.hash.flatten()
    pil_image.close()

    #fp = cv.imread(image_file, cv.IMREAD_GRAYSCALE)
    #f1, m1, ls1,c1,b1 = identification(fp)
    sample = cv.imread(image_file)
    keypoints_1, des1 = sift.detectAndCompute(sample, None)
    coordinates_1 = [kp.pt for kp in keypoints_1]
    for i, coord in enumerate(coordinates_1):
      hold_cord1.append((int(coord[0]),int(coord[1])))

    mi = minutiae_hash(hold_cord1)
    #sig.append(signature)
    #signature=np.append(signature,signature2)  #[[dhash],[minute]]
    #signature=np.append(signature,signature3)
    sig=np.append(signature,mi)

    #sig=np.append(sig,des1).astype(np.uint8)

    return sig


def find_near_duplicates(input_dir: str, test_sample: str, threshold: float, hash_size: int, bands: int) -> List[Tuple[str, str, float]]:
    """
    Find near-duplicate images

    Args:
        input_dir: Directory with images to check
        threshold: Images with a similarity ratio >= threshold will be considered near-duplicates
        hash_size: Hash size to use, signatures will be of length hash_size^2
        bands: The number of bands to use in the locality sensitve hashing process

    Returns:
        A list of near-duplicates found. Near duplicates are encoded as a triple: (filename_A, filename_B, similarity)
    """
    global count2,fing1,fing2,negc2
    rows: int = 208 #int(hash_size**2/bands) 166
    signatures = dict()
    hash_buckets_list: List[Dict[str, List[str]]] = [dict() for _ in range(bands)]

    # Build a list of candidate files in given input_dir
    for fl in range(len(input_dir)):

        file_list = [join(input_dir[fl], f) for f in listdir(input_dir[fl]) if isfile(join(input_dir[fl], f))]

    # Iterate through all files in input directory
        for fh in file_list:
            if count2 % 100 == 0 : print(count2)
            count2+=1
            try:
                signature = calculate_signature(fh, hash_size)

            except IOError:
            # Not a PIL image, skip this file
                continue

        # Keep track of each image's signature
            signatures[fh] = np.packbits(signature)

        # Locality Sensitive Hashing
            for i in range(bands):
                signature_band = signature[i*rows:(i+1)*rows]
                signature_band_bytes = signature_band.tobytes()
                if signature_band_bytes not in hash_buckets_list[i]:
                    hash_buckets_list[i][signature_band_bytes] = list()
                hash_buckets_list[i][signature_band_bytes].append(fh)

    # Build candidate pairs based on bucket membership
    candidate_pairs = set()
    for hash_buckets in hash_buckets_list:
        for hash_bucket in hash_buckets.values():
            if len(hash_bucket) > 1:
                hash_bucket = sorted(hash_bucket)
                for i in range(len(hash_bucket)):
                    for j in range(i+1, len(hash_bucket)):
                        candidate_pairs.add(tuple([hash_bucket[i],hash_bucket[j]]))

    # Check candidate pairs for similarity
    near_duplicates = list()
    fing1=candidate_pairs
    fing2=signatures
    for cpa, cpb in candidate_pairs:
            subject_id_gender_finger1 = cpa.split('/')[-1].split('_')[0:5]
            subject_id_gender_finger2 = test_sample.split('/')[-1].split('_')[0:5]
            subject_id_gender_finger1_str = '_'.join(subject_id_gender_finger1)
            subject_id_gender_finger2_str = '_'.join(subject_id_gender_finger2)
            #print('1',subject_id_gender_finger1_str)
            #print('2',subject_id_gender_finger2_str)
            if subject_id_gender_finger1_str==subject_id_gender_finger2_str:
                hd = sum(np.bitwise_xor(
                    np.unpackbits(signatures[cpa]),
                    np.unpackbits(signatures[cpb])
                ))
                similarity = (hash_size**2 - hd) / hash_size**2
                #print(similarity)
                #if similarity > threshold:
                near_duplicates.append((cpa, cpb, similarity))

    # Sort near-duplicates by descending similarity and return
    near_duplicates.sort(key=lambda x:x[2], reverse=True)

    return near_duplicates


def main(ip,op):

    input_dir = ip
    test_sample = op           #'/content/New_Folder/7__M_Left_index_finger.BMP'
    threshold = 0.1
    hash_size = 129  #22,8,57
    bands = 80

    flag=0

    try:
        near_duplicates = find_near_duplicates(input_dir, test_sample, threshold, hash_size, bands)
        print('n',near_duplicates)
        if near_duplicates:
            print(f"Found {len(near_duplicates)} near-duplicate images in {input_dir} (threshold {threshold:.2%})")
            for a,b,s in near_duplicates:
                print(f"{s:.2%} similarity: file 1: {a} - file 2: {b}")
            flag=1
            return near_duplicates

        elif flag==0:
            print(f"No near-duplicates found in {input_dir} (threshold {threshold:.2%})")
            return 0

    except OSError:
        print(f"Couldn't open input directory {input_dir}")
        return 0


In [None]:
xx=calculate_signature('try2/Real/69__M_Left_little_finger.BMP',16)
xx.shape

  Image.ANTIALIAS)


(16585,)

In [None]:
iter=0
counter=0
if __name__ == "__main__":

    paths =  ['try2/Real/', 'try2/Altered/Altered-Easy/', 'try2/Altered/Altered-Hard/', 'try2/Altered/Altered-Medium/']
    paths2=['try2i/']
    main(paths, 'try2/Altered/Altered-Hard/69__M_Right_thumb_finger_Obl.BMP')

0


  Image.ANTIALIAS)


100
200
300
400
n [('try2/Altered/Altered-Hard/69__M_Right_thumb_finger_Obl.BMP', 'try2/Altered/Altered-Medium/69__M_Right_thumb_finger_Obl.BMP', 0.9781263145243675), ('try2/Altered/Altered-Medium/69__M_Right_thumb_finger_Obl.BMP', 'try2/Real/69__M_Right_thumb_finger.BMP', 0.9500630971696412), ('try2/Altered/Altered-Hard/69__M_Right_thumb_finger_Obl.BMP', 'try2/Real/69__M_Right_thumb_finger.BMP', 0.9402079201971035), ('try2/Altered/Altered-Medium/69__M_Right_thumb_finger_Zcut.BMP', 'try2/Real/69__M_Right_thumb_finger.BMP', 0.9389459768042786), ('try2/Altered/Altered-Medium/69__M_Right_thumb_finger_CR.BMP', 'try2/Real/69__M_Right_thumb_finger.BMP', 0.9019890631572622), ('try2/Altered/Altered-Medium/69__M_Right_thumb_finger_Obl.BMP', 'try2/Altered/Altered-Medium/69__M_Right_thumb_finger_Zcut.BMP', 0.9010275824770146), ('try2/Altered/Altered-Hard/69__M_Right_thumb_finger_Zcut.BMP', 'try2/Real/69__M_Right_thumb_finger.BMP', 0.8963403641608076), ('try2/Altered/Altered-Hard/69__M_Right_thumb

In [None]:
#finger_dict,finger_names
pos_flag,fpos_flag,neg_flag,fneg_flag,final_acc=0,0,0,0,0
count_flag,counter=0,0
tp=0
def matchfunc(ip,threshold):
    global finger_dict, finger_names,tp
    g_cpa = ip
    for cpa, cpb in fing1:
          try:
                subject_id_gender_finger1 = cpa.split('/')[-1].split('_')[0:5]
                subject_id_gender_finger2 = g_cpa.split('/')[-1].split('_')[0:5]
                subject_id_gender_finger1_str = '_'.join(subject_id_gender_finger1)
                subject_id_gender_finger2_str = '_'.join(subject_id_gender_finger2)
                #print('1',subject_id_gender_finger1_str)
                #print('2',subject_id_gender_finger2_str)
                if subject_id_gender_finger1_str==subject_id_gender_finger2_str:
                    hd = sum(np.bitwise_xor(
                        np.unpackbits(fing2[cpa]),
                        np.unpackbits(fing2[cpb])))

                    similarity = (hash_size**2 - hd) / hash_size**2

                    if similarity > threshold:
                        tp+=1
                        near_duplicates.append((g_cpa, cpb, similarity))
                near_duplicates.sort(key=lambda x:x[2], reverse=True)
                return near_duplicates
          except:
                return 0


paths =  ['SOCOFing/Real/', 'SOCOFing/Altered/Altered-Easy/', 'SOCOFing/Altered/Altered-Hard/', 'SOCOFing/Altered/Altered-Medium/']
#paths2 =  ['try2/Real/', 'try2/Altered/Altered-Easy/', 'try2/Altered/Altered-Hard/', 'try2/Altered/Altered-Medium/']
for input_dir in range(len(paths)):
    file_list = [join(paths[input_dir], f) for f in listdir(paths[input_dir]) if isfile(join(paths[input_dir], f))]
    counter+=1
    for fh in file_list:
        count_flag+=1
        #print(paths[input_dir])
        if count_flag % 1000000000==0: print(count_flag,(count_flag/1000000000),'B')

        #temp1 = str(paths[f])
        result2 = main(paths2,fh) #main(paths, 'try2/Altered/Altered-Hard/69__M_Right_thumb_finger_Obl.BMP')
        #print(result2)
        if result2!=0:
            xx = result2[0][0]
            yy = result2[0][1]
            path1 = xx
            path2 = yy

            # Extract the relevant parts from the file paths
            subject_id_gender_finger1iii = path1.split('/')[-1].split('_')[0:6]
            subject_id_gender_finger2iii = path2.split('/')[-1].split('_')[0:6]

            # Join the extracted parts into a single string for comparison
            subject_id_gender_finger1_striii = '_'.join(subject_id_gender_finger1iii)
            subject_id_gender_finger2_striii = '_'.join(subject_id_gender_finger2iii)
            #print(subject_id_gender_finger1_str)
            #print(subject_id_gender_finger2_str)
            # Compare the two strings
            if fh==path1 or fh==path2:
                pos_flag+=1
            else:
                fpos_flag+=1
        else:
            fneg_flag+=1
    print(counter,'/',len(paths),'Folders done.')


print()
print('True Positive:',pos_flag)
print('False Positive:',fpos_flag)
print('True Negative:',neg_flag)
print('False Negative:',fneg_flag)
print('Iterations:',count_flag)
print()
print('Accuracy:',(pos_flag/count_flag))
print('Precision:',(pos_flag/(pos_flag+fpos_flag)))
print('Recall:',(pos_flag/(pos_flag+fneg_flag)))
print('F1 Score:',(2*(((pos_flag/(pos_flag+fpos_flag))*(pos_flag/(pos_flag+fneg_flag)))/(((pos_flag/(pos_flag+fpos_flag))+(pos_flag/(pos_flag+fneg_flag)))))))

1 / 4 Folders done.
1000000000 1 B
2 / 4 Folders done.
2000000000 2 B
3 / 4 Folders done.
3000000000 3 B
4 / 4 Folders done.

True Positive: 55265
False Positive: 0
True Negative: 5
False Negative: 0
Iterations: 55270

Accuracy: 0.9999095350099512
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
