In [1]:
from utils import prepare_image_forgery_dataset
import os

PATH = '../data/CASIA2'
authentic_dir = os.path.join(PATH, 'Au')
tampered_dir = os.path.join(PATH, 'Tp2')

X_train, X_test, y_train, y_test = prepare_image_forgery_dataset(authentic_dir, tampered_dir, 100, 100)

2024-07-07 00:50:36.470022: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-07 00:50:36.470437: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-07 00:50:36.592809: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-07 00:50:36.848621: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split

class SimpleBoostingClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, base_models, rounds=5, learning_rate=0.1, validation_fraction=0.1, random_state=42):
        self.base_models = base_models
        self.rounds = rounds
        self.learning_rate = learning_rate
        self.validation_fraction = validation_fraction
        self.random_state = random_state
        self.models = []
        self.alphas = []

    def fit(self, X, y):
        # Asegurarse de que y sea un array 1D
        y = np.ravel(y)

        # Dividir los datos en conjuntos de entrenamiento y validación
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=self.validation_fraction, random_state=self.random_state)

        self.classes_ = np.unique(y_train)

        # Inicializar pesos
        weights = np.ones(len(y_train)) / len(y_train)

        for i in range(self.rounds):
            errors = np.zeros(len(self.base_models))
            predictions = np.zeros((len(self.base_models), len(y_train)))

            # Entrenar modelos base y calcular errores
            for i, model in enumerate(self.base_models):
                model.fit(X_train, y_train, sample_weight=weights)
                pred = model.predict(X_train)
                predictions[i] = (pred.ravel() > 0.5).astype(int)  # Convertir probabilidades a etiquetas
                errors[i] = np.sum(weights * (predictions[i] != y_train.astype(int)))

            # Seleccionar el mejor modelo
            best_model_index = np.argmin(errors)
            best_model = self.base_models[best_model_index]
            best_pred = predictions[best_model_index]

            # Calcular alpha
            error = errors[best_model_index]
            alpha = self.learning_rate * (np.log((1 - error) / error) + np.log(len(self.classes_) - 1))

            # Actualizar pesos
            weights *= np.exp(alpha * (best_pred != y_train.astype(int)))
            weights /= np.sum(weights)

            # Guardar modelo y alpha
            self.models.append(best_model)
            self.alphas.append(alpha)

            # Evaluar en el conjunto de validación
            val_pred = self.predict(X_val)
            val_error = np.mean(val_pred != y_val.astype(int))
            print(f"Validation error after round {i} model {len(self.models)}: {val_error:.4f}")

        return self

    def predict(self, X):
        predictions = np.zeros((len(self.models), X.shape[0]))
        for i, model in enumerate(self.models):
            pred = model.predict(X)
            predictions[i] = (pred.ravel() > 0.5).astype(int)  # Convertir probabilidades a etiquetas

        weighted_preds = np.sum(np.array(self.alphas)[:, np.newaxis] * predictions, axis=0)

        return self.classes_[(weighted_preds > 0).astype(int)]

In [3]:
from sklearn.metrics import accuracy_score
from models.cnn import CNNImageForgeryPredictorModel
from models.svm import SVMImageForgeryPredictorModel
from models.fine_tuning import TransferLearningImageForgeryPredictorModel

base_models = [TransferLearningImageForgeryPredictorModel() for _ in range(2)] + \
            [SVMImageForgeryPredictorModel() for _ in range(2)] + \
            [CNNImageForgeryPredictorModel() for _ in range(2)]

# Crear y entrenar el modelo de boosting con early stopping
boosting_model = SimpleBoostingClassifier(base_models, rounds=1)
boosting_model.fit(X_train, y_train)

# Hacer predicciones y evaluar
y_pred = boosting_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 10/20


In [None]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split

class CustomBoostingClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, base_models, n_estimators=10, learning_rate=0.1, validation_fraction=0.1, random_state=42):
        self.base_models = base_models
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.validation_fraction = validation_fraction
        self.random_state = random_state
        self.models = []
        self.weights = []
        self.validation_errors = []  # Nueva lista para almacenar los errores de validación

    def print_validation_history(self):
        print("Validation Error History:")
        for i, error in enumerate(self.validation_errors, 1):
            print(f"Model {i}: {error:.4f}")


    def fit(self, X, y):
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=self.validation_fraction, random_state=self.random_state)
        
        self.classes_ = np.unique(y_train)
        n_samples = X_train.shape[0]
        sample_weights = np.ones(n_samples) / n_samples

        for _ in range(self.n_estimators):
            best_model = None
            best_error = float('inf')
            
            for model in self.base_models:
                model.fit(X_train, y_train, sample_weight=sample_weights)
                predictions = model.predict(X_train)
                error = np.sum(sample_weights * (predictions != y_train)) / np.sum(sample_weights)
                
                if error < best_error:
                    best_model = model
                    best_error = error

            # Calcular el peso del modelo
            model_weight = self.learning_rate * np.log((1 - best_error) / best_error)
            
            # Actualizar los pesos de las muestras
            predictions = best_model.predict(X_train)
            sample_weights *= np.exp(model_weight * (predictions != y_train))
            sample_weights /= np.sum(sample_weights)

            self.models.append(best_model)
            self.weights.append(model_weight)

            # Evaluar en el conjunto de validación
            val_pred = self.predict(X_val)
            val_error = np.mean(val_pred != y_val)
            self.validation_errors.append(val_error) 

        return self

    def predict(self, X):
        predictions = np.zeros((len(self.models), X.shape[0]))
        for i, model in enumerate(self.models):
            predictions[i] = model.predict(X)

        weighted_preds = np.sum(np.array(self.weights)[:, np.newaxis] * predictions, axis=0)
        return self.classes_[(weighted_preds > 0).astype(int)]

In [None]:
base_models = [TransferLearningImageForgeryPredictorModel() for _ in range(2)] + \
              [SVMImageForgeryPredictorModel() for _ in range(2)] + \
              [CNNImageForgeryPredictorModel() for _ in range(2)]

boosting_model = CustomBoostingClassifier(base_models, n_estimators=10, learning_rate=0.1)
boosting_model.fit(X_train, y_train)
boosting_model.print_validation_history()

In [None]:
y_pred = boosting_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")