In [None]:
from datasets import load_dataset
import numpy as np
from tqdm import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
dataset = load_dataset("Hemg/AI-Generated-vs-Real-Images-Datasets",split="train")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
dataset[0]  #880x400

In [None]:
dataset.shape
#152710,2

In [None]:
import matplotlib.pyplot as plt

plt.imshow(dataset[0]["image"])
plt.title(f"Label: {dataset[0]['label']}")
plt.axis("off")
plt.show()

In [None]:
X = []
y = []

for i in range(len(dataset)):
    img = dataset[i]["image"].convert("RGB").resize((32, 32))
    X.append(np.array(img))
    y.append(dataset[i]["label"])

X = np.array(X, dtype="float32") / 255.0
y = np.array(y)

print("Tama√±o del dataset:", X.shape, y.shape)



KeyboardInterrupt: 

In [None]:
X_train, Xdata, y_train, ydata = train_test_split(X, y, test_size=0.2, shuffle=True, stratify=y,random_state=21)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
)

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(32, 32, 3)),

    tf.keras.layers.Dense(512, activation='gelu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(256, activation='gelu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(128, activation='gelu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(64, activation='gelu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(2, activation='softmax')
])

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)

model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

y_train_one_hot = tf.keras.utils.to_categorical(y_train, num_classes=2)
y_test_one_hot = tf.keras.utils.to_categorical(ydata, num_classes=2)


history = model.fit(
    datagen.flow(X_train, y_train_one_hot, batch_size=64),
    validation_data=(Xdata, y_test_one_hot),
    epochs=30,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=8, restore_best_weights=True, verbose=1),
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=4, verbose=1)
    ]
)

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/IA2')
print(os.getcwd())

In [None]:
model.save('DNNmodelseed13.keras')

In [None]:
from tensorflow.keras.models import load_model
model = load_model('DNNmodelwholedatattsplit.keras')

In [None]:
# Show the loss and accuracy of the model
import matplotlib.pyplot as plt

fig = plt.figure(figsize = (10,5))

#Accuracy
ax = fig.add_subplot(1, 2, 1)
ax.plot(history.history['accuracy'], label = 'Train Accuracy');
ax.plot(history.history['val_accuracy'], label = 'Validation Accuracy');
ax.set_xlabel('Epochs');
ax.set_ylabel('Accuracy');
ax.legend();

#Loss
ax = fig.add_subplot(1, 2, 2)
ax.plot(history.history['loss'], label = 'train loss');
ax.plot(history.history['val_loss'], label = 'evaluation loss');
ax.legend();
ax.set_xlabel('Epochs');
ax.set_ylabel('Loss');

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, roc_auc_score

# --- Predecir con el modelo ---
y_pred_prob = model.predict(Xdata)

# Como el modelo usa softmax, la probabilidad de clase positiva est√° en la columna 1
y_pred_class = np.argmax(y_pred_prob, axis=1)

# Si tu ydata son etiquetas 0/1, no es necesario transformarlas
y_true = ydata

# --- M√©tricas de evaluaci√≥n ---
cm = confusion_matrix(y_true, y_pred_class)
precision = precision_score(y_true, y_pred_class)
recall = recall_score(y_true, y_pred_class)
accuracy = accuracy_score(y_true, y_pred_class)
auc = roc_auc_score(y_true, y_pred_prob[:, 1])  # usa la prob de clase positiva

# --- Mostrar resultados ---
print(f"‚úÖ Accuracy: {accuracy:.4f}")
print(f"üéØ Precision: {precision:.4f}")
print(f"üîÅ Recall: {recall:.4f}")
print(f"üìà AUC-ROC: {auc:.4f}")

# --- Matriz de confusi√≥n ---
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
from sklearn.metrics import classification_report

print("\n=== Classification report (test) ===")
print(classification_report(y_true, y_pred_class, target_names=["Real", "IA"], digits=4))