In [8]:
import cv2
import numpy as np
from sklearn import svm
from skimage.feature import graycomatrix, graycoprops
import os

In [2]:
# Path to your CASIA2 dataset
PATH = '../data/CASIA2'

# Directories for authentic and tampered images
authentic_dir = os.path.join(PATH, 'Au')
tampered_dir = os.path.join(PATH, 'Tp2')

IMG_SIZE = (160, 160)

def get_file_list_and_labels(directory, label):
    file_list = []
    labels = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                file_list.append(os.path.join(root, file))
                labels.append(label)
    return file_list, labels

# Get file lists and labels
authentic_files, authentic_labels = get_file_list_and_labels(authentic_dir, 0)
tampered_files, tampered_labels = get_file_list_and_labels(tampered_dir, 1)

# Combine authentic and tampered data
all_files = tampered_files + authentic_files
all_labels = tampered_labels + authentic_labels

In [4]:
def load_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return image

In [48]:
def fourier_transform(image):
    f = np.fft.fft2(image)
    fshift = np.fft.fftshift(f)
    magnitude_spectrum = 20*np.log(np.abs(fshift))
    return magnitude_spectrum

def noise_features(image):
    # modelo de ruido básico
    mean_noise = np.mean(image)
    std_noise = np.std(image)
    return mean_noise, std_noise

def edge_detection(image):
    edges = cv2.Canny(image, 100, 200)
    return edges

def texture_features(image):
    g = graycomatrix(image, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256)
    contrast = graycoprops(g, 'contrast')
    return np.mean(contrast)

# Compatible with grey scale 
def segment_image(image, k=4):
    # Flatten the image to a 1D array suitable for k-means
    Z = image.reshape((-1, 1))

    # Convert to float32
    Z = np.float32(Z)

    # Criteria and k-means application
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    ret, label, center = cv2.kmeans(Z, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    # Convert back to uint8 and map centers to the original image
    center = np.uint8(center)
    res = center[label.flatten()]
    segmented_image = res.reshape((image.shape))

    return segmented_image

def extract_features(image):
    ft = fourier_transform(image).ravel()  # Aplana el resultado de la transformada de Fourier 70
    nf = [], []#noise_features(image)  # Retorna dos escalares 72
    ed = []#edge_detection(image).ravel()  # Aplana los bordes detectados 77
    tf = []#np.array([texture_features(image)])  # Envuelve el escalar en un arreglo 72
    seg = segment_image(image).ravel()  # Aplana la imagen segmentada 58
    
    # Concatena todas las características en un solo arreglo 1D
    return np.hstack([ft, nf[0], nf[1], ed, tf, seg])

def prepare_dataset(images):
    features = []
    labels = []  # llenar con 0 (no manipulado) y 1 (manipulado)

    total = len(images)
    count = 0

    for image, label in images:
        if image.shape == (256, 384):
            feat = extract_features(image)
            features.append(feat)
            labels.append(label)
            
        count += 1
        print(f"{count}/{total}")

    return np.array(features), np.array(labels)


import os
au_paths = [os.path.join('D:\dataset--ml\CASIA2-Train-Min\Au', entry) for entry in os.listdir('D:\dataset--ml\CASIA2-Train-Min\Au') if entry.endswith(".jpg")]
tp_paths = [os.path.join('D:\dataset--ml\CASIA2-Train-Min\Tp', entry) for entry in os.listdir('D:\dataset--ml\CASIA2-Train-Min\Tp') if entry.endswith(".jpg")]

model = svm.SVC()

# Cargar imágenes
images = [(load_image(path), 0) for path in au_paths] # leer del dataset
images.extend([(load_image(path), 1) for path in tp_paths])

# Preparar conjunto de datos
X, y = prepare_dataset(images)

if len(y) > 0:
    model.fit(X, y)

1/709
2/709
3/709
4/709
5/709
6/709
7/709
8/709
9/709
10/709
11/709
12/709
13/709
14/709
15/709
16/709
17/709
18/709
19/709
20/709
21/709
22/709
23/709
24/709
25/709
26/709
27/709
28/709
29/709
30/709
31/709
32/709
33/709
34/709
35/709
36/709
37/709
38/709
39/709
40/709
41/709
42/709
43/709
44/709
45/709
46/709
47/709
48/709
49/709
50/709
51/709
52/709
53/709
54/709
55/709
56/709
57/709
58/709
59/709
60/709
61/709
62/709
63/709
64/709
65/709
66/709
67/709
68/709
69/709
70/709
71/709
72/709
73/709
74/709
75/709
76/709
77/709
78/709
79/709
80/709
81/709
82/709
83/709
84/709
85/709
86/709
87/709
88/709
89/709
90/709
91/709
92/709
93/709
94/709
95/709
96/709
97/709
98/709
99/709
100/709
101/709
102/709
103/709
104/709
105/709
106/709
107/709
108/709
109/709
110/709
111/709
112/709
113/709
114/709
115/709
116/709
117/709
118/709
119/709
120/709
121/709
122/709
123/709
124/709
125/709
126/709
127/709
128/709
129/709
130/709
131/709
132/709
133/709
134/709
135/709
136/709
137/709
138/709
139/

In [2]:
print("X_train shape:", X.shape)
print("y_train shape:", y.shape)

X_train shape: (577, 294915)
y_train shape: (577,)


In [3]:
model = svm.SVC()
model.fit(X, y)

In [49]:
import os
au_test_paths = [os.path.join('D:\dataset--ml\CASIA2-Test\Au', entry) for entry in os.listdir('D:\dataset--ml\CASIA2-Test\Au') if entry.endswith(".jpg")]
tp_test_paths = [os.path.join('D:\dataset--ml\CASIA2-Test\Tp', entry) for entry in os.listdir('D:\dataset--ml\CASIA2-Test\Tp') if entry.endswith(".jpg")]

# Cargar imágenes
images = [(load_image(path), 0) for path in au_test_paths[:100]] # leer del dataset
images.extend([(load_image(path), 1) for path in tp_test_paths[:100]])

# Preparar conjunto de datos
X_test, y_test = prepare_dataset(images)

1/200
2/200
3/200
4/200
5/200
6/200
7/200
8/200
9/200
10/200
11/200
12/200
13/200
14/200
15/200
16/200
17/200
18/200
19/200
20/200
21/200
22/200
23/200
24/200
25/200
26/200
27/200
28/200
29/200
30/200
31/200
32/200
33/200
34/200
35/200
36/200
37/200
38/200
39/200
40/200
41/200
42/200
43/200
44/200
45/200
46/200
47/200
48/200
49/200
50/200
51/200
52/200
53/200
54/200
55/200
56/200
57/200
58/200
59/200
60/200
61/200
62/200
63/200
64/200
65/200
66/200
67/200
68/200
69/200
70/200
71/200
72/200
73/200
74/200
75/200
76/200
77/200
78/200
79/200
80/200
81/200
82/200
83/200
84/200
85/200
86/200
87/200
88/200
89/200
90/200
91/200
92/200
93/200
94/200
95/200
96/200
97/200
98/200
99/200
100/200
101/200
102/200
103/200
104/200
105/200
106/200
107/200
108/200
109/200
110/200
111/200
112/200
113/200
114/200
115/200
116/200
117/200
118/200
119/200
120/200
121/200
122/200
123/200
124/200
125/200
126/200
127/200
128/200
129/200
130/200
131/200
132/200
133/200
134/200
135/200
136/200
137/200
138/200
139/

In [50]:
# Evaluar modelo
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

Accuracy: 0.776595744680851
