In [None]:
import cv2
import numpy as np
import tensorflow as tf
from sklearn import svm
from sklearn.model_selection import train_test_split
from skimage.feature import greycomatrix, greycoprops
import os

In [None]:
# Path to your CASIA2 dataset
PATH = '../data/CASIA2'

# Directories for authentic and tampered images
authentic_dir = os.path.join(PATH, 'Au')
tampered_dir = os.path.join(PATH, 'Tp2')

IMG_SIZE = (160, 160)

def get_file_list_and_labels(directory, label):
    file_list = []
    labels = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                file_list.append(os.path.join(root, file))
                labels.append(label)
    return file_list, labels

# Get file lists and labels
authentic_files, authentic_labels = get_file_list_and_labels(authentic_dir, 0)
tampered_files, tampered_labels = get_file_list_and_labels(tampered_dir, 1)

# Combine authentic and tampered data
all_files = tampered_files + authentic_files
all_labels = tampered_labels + authentic_labels

In [None]:
def load_and_preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Check if the image is loaded correctly
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    # Resize image
    image = cv2.resize(image, IMG_SIZE)
    return image

# Cargar imágenes
images = [load_and_preprocess_image(path) for path in all_files] # leer del dataset

In [None]:
def fourier_transform(image):
    f = np.fft.fft2(image)
    fshift = np.fft.fftshift(f)
    magnitude_spectrum = 20*np.log(np.abs(fshift))
    return magnitude_spectrum

def noise_features(image):
    # modelo de ruido básico
    mean_noise = np.mean(image)
    std_noise = np.std(image)
    return mean_noise, std_noise

def edge_detection(image):
    edges = cv2.Canny(image, 100, 200)
    return edges

def texture_features(image):
    g = greycomatrix(image, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256)
    contrast = greycoprops(g, 'contrast')
    return np.mean(contrast)

# Compatible with grey scale 
def segment_image(image, k=4):
    # Flatten the image to a 1D array suitable for k-means
    Z = image.reshape((-1, 1))

    # Convert to float32
    Z = np.float32(Z)

    # Criteria and k-means application
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    ret, label, center = cv2.kmeans(Z, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    # Convert back to uint8 and map centers to the original image
    center = np.uint8(center)
    res = center[label.flatten()]
    segmented_image = res.reshape((image.shape))

    return segmented_image

def extract_features(image):
    ft = fourier_transform(image)
    nf = noise_features(image)
    ed = edge_detection(image)
    tf = texture_features(image)
    seg = segment_image(image) 
    return np.hstack([ft.ravel(), nf, ed.ravel(), tf.ravel(), seg.ravel()])

In [None]:
def prepare_dataset(labeled_images, count):
    features = []
    labels = []  
    for image, label in labeled_images:
        count = count + 1
        if (count % 1000 == 0):
            print(f"{count}/{len(all_files)} completed")
        feat = extract_features(image)
        features.append(feat)
        labels.append(label)
    return np.array(features), np.array(labels)

X, y = prepare_dataset(zip(images, all_labels), 0)

In [None]:
import pickle

# Serialización del conjunto de datos
def serialize_dataset(features, labels, features_filename, labels_filename):
    with open(features_filename, 'wb') as f:
        pickle.dump(features, f)
    with open(labels_filename, 'wb') as f:
        pickle.dump(labels, f)

# Deserialización del conjunto de datos
def deserialize_dataset(features_filename, labels_filename):
    with open(features_filename, 'rb') as f:
        features = pickle.load(f)
    with open(labels_filename, 'rb') as f:
        labels = pickle.load(f)
    return features, labels

# Serialización
serialize_dataset(X, y, '../models/features.pkl', '../models/labels.pkl')

# Deserialización
X_deserialized, y_deserialized = deserialize_dataset('../models/features.pkl', '../models/labels.pkl')

print("Características deserializadas (numpy array):", X_deserialized)
print("Etiquetas deserializadas (numpy array):", y_deserialized)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
model = svm.SVC()
model.fit(X_train, y_train)

In [None]:
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")