In [9]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import hog

from joblib import dump, load

In [10]:
signatures_path = os.path.join(str(globals()['_dh'][0]).replace("notebooks", ""), "signatures")
print(signatures_path)
parent_path = str(globals()['_dh'][0]).replace("notebooks", "")
print(parent_path)

/home/matteo/Repos/signature-forgery-detection/signatures
/home/matteo/Repos/signature-forgery-detection/


In [11]:
def pre_process(img_path):
  
  img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
  img_resized = cv2.resize(img, (500, 300))
  img_normalized = img_resized / 255

  return img_normalized

In [12]:
def extract_features(img):

    hog_features, _ = hog(img, orientations = 8, pixels_per_cell = (16,16), cells_per_block = (1,1), visualize = True)
    
    return hog_features

In [19]:
def prepare_data(signatures_path):
    features = []
    labels_individuals = []
    labels_binary = []

    for person_folder in os.listdir(signatures_path):
        if os.path.isdir(os.path.join(signatures_path, person_folder)):
            labels_individuals.append(person_folder)
            for signature_file in os.listdir(os.path.join(signatures_path, person_folder)):
                if signature_file.endswith(".png"):
                    image_path = os.path.join(signatures_path, person_folder, signature_file)
                    preprocessed_image = pre_process(image_path)
                    features.append(extract_features(preprocessed_image))
                    labels_binary.append(1 if person_folder == "original" else 0)
    
    return np.array(features), np.array(labels_individuals), np.array(labels_binary)

In [21]:
signatures_path = os.path.join(str(globals()['_dh'][0]).replace("notebooks", ""), "signatures")
features, labels_individuals, labels_binary = prepare_data(signatures_path)

print(features.shape)
print(labels_individuals)
print(labels_binary.shape)

(0,)
['signatures_18' 'signatures_44' 'signatures_42' 'signatures_5'
 'signatures_40' 'signatures_37' 'signatures_36' 'signatures_17'
 'signatures_41' 'signatures_39' 'signatures_26' 'signatures_21'
 'signatures_23' 'signatures_32' 'signatures_6' 'signatures_33'
 'signatures_11' 'signatures_38' 'signatures_47' 'signatures_34'
 'signatures_8' 'signatures_46' 'signatures_19' 'signatures_49'
 'signatures_45' 'signatures_20' 'signatures_51' 'signatures_27'
 'signatures_12' 'signatures_16' 'signatures_2' 'signatures_25'
 'signatures_31' 'signatures_48' 'signatures_50' 'signatures_14'
 'signatures_29' 'signatures_9' 'signatures_53' 'signatures_52'
 'signatures_35' 'signatures_24' 'signatures_7' 'signatures_13'
 'signatures_10' 'signatures_43' 'signatures_1' 'signatures_3'
 'signatures_4' 'signatures_22' 'signatures_30' 'signatures_55'
 'signatures_15' 'signatures_54' 'signatures_28']
(0,)


In [15]:
# SVM FOR SIGNATURE RECOGNITION

X_train, X_test, y_train, y_test = train_test_split(features, labels_individuals, test_size = 0.3, random_state = 42)

svm_recognition = SVC(kernel = "rbf")
svm_recognition.fit(X_train, y_train)

y_pred = svm_recognition.predict(X_test)
print(classification_report(y_test, y_pred))

dump(svm_recognition, os.path.join(parent_path, "models/svm_recognition.joblib"))


ValueError: Found input variables with inconsistent numbers of samples: [0, 55]

In [None]:
# SVM FOR SIGNATURE VERIFICATION

# TO-DO


In [None]:
images_array = []
targets_array = []

for cls in classes:

  print(f"Loading class {cls}")
  curr_path = os.path.join(parent_path, cls)
  print(curr_path)

  img_path = os.path.join(curr_path, cls) + "1"
  print(img_path)

  for img in os.listdir(img_path):
    if(img.endswith(".png")):
      read_img = cv2.imread(os.path.join(img_path, img))
      pre_processed = pre_process(read_img)
      images_array.append(pre_processed)
      targets_array.append(classes.index(cls))

  print(f"{cls} class loaded successfully")

In [21]:
descriptors = []
sift = cv2.SIFT_create()

max_len = 0

for img in images_array:
  _, desc = sift.detectAndCompute(img, None)
  np_desc = np.array(desc).flatten()
  # print(np_desc.shape)
  descriptors.append(np_desc)
  if len(np_desc) > max_len:
    max_len = len(np_desc)

descriptors = np.array(descriptors, dtype = object).flatten()
# print(descriptors.shape)
targets_array = np.array(targets_array, dtype = int).flatten()
# print(targets_array.shape)

In [26]:
padded_descriptors = np.empty([descriptors.size, max_len], dtype = object)
# print(padded_descriptors.shape)

for i, descriptor in enumerate(descriptors):
  padded = np.pad(descriptor, (0, max_len - len(descriptor))).flatten()
  padded_descriptors[i] = padded

# print(padded_descriptors.shape)
# print(str(padded_descriptors))


(48, 24064)
[[0.0 0.0 0.0 ... 0.0 0.0 0.0]
 [0.0 0.0 0.0 ... 0.0 0.0 0.0]
 [0.0 4.0 37.0 ... 0.0 0.0 0.0]
 ...
 [0.0 0.0 0.0 ... 0.0 0.0 0.0]
 [0.0 0.0 0.0 ... 0.0 0.0 0.0]
 [0.0 0.0 0.0 ... 0.0 0.0 0.0]]


In [23]:
X = padded_descriptors
y = targets_array

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state = 42)

In [24]:
svm = SVC(kernel = "linear", probability = True)

svm.fit(X_train, y_train)

In [25]:
y_pred = svm.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.7333333333333333
