In [None]:
import os, re, random
import pandas as pd
import numpy as np
import zipfile
np.random.seed(2)
from PIL import Image
import io
# Visualisation
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

# Tools for cross-validation, error calculation
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools
from keras.utils import to_categorical

# Machine Learning
from keras.models import Sequential
from keras import layers
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.layers import MaxPooling2D, GlobalMaxPooling2D, Activation
from keras.optimizers import RMSprop
# from keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
# from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.models import Model
from keras.applications import VGG16
from keras.applications.resnet50 import ResNet50

In [None]:
zip_train_path = '/kaggle/input/dogs-vs-cats/train.zip'
zip_test_path = '/kaggle/input/dogs-vs-cats/test1.zip'
new_size = (128, 128)
def preprocess(input_zip, size=new_size):
    images_processed = []

    with zipfile.ZipFile(input_zip, 'r') as archive_zip:
        archives = archive_zip.namelist()

        images = [archive for archive in archives if archive.endswith(('.png', '.jpg', '.jpeg'))]

        for img_path in images:
            with archive_zip.open(img_path) as image_zip:
                img = Image.open(io.BytesIO(image_zip.read()))

                img = img.convert('L')

                img = img.resize(size)

                img_array = np.array(img) / 255.0

                images_processed.append(img_array)

    dataset = np.array(images_processed)

    return dataset


In [None]:
train_dataset = preprocess(zip_train_path, new_size)


In [None]:
def extract_tags(input_zip):
    results = []
    with zipfile.ZipFile(input_zip, 'r') as archive_zip:
        archives = archive_zip.namelist()

        images = [archive for archive in archives if archive.endswith(('.png', '.jpg', '.jpeg'))]

        for img_path in images:
            if 'dog' in img_path.lower():
                results.append(('dog', img_path))
            elif 'cat' in img_path.lower():
                results.append(('cat', img_path))
            else:
                results.append(('unknown', img_path))  # Si no es identificable

    return results
train_labels = extract_tags(zip_train_path)
train_binary = np.array([1 if label == 'dog' else 0 for label, _ in train_labels])

train_dataset = np.expand_dims(train_dataset, axis=-1)

# Train split
X_train, X_temp, y_train, y_temp = train_test_split(train_dataset, train_binary, test_size=0.2, random_state=42)

# Validation-Test Split
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
j=480
plt.imshow(X_val[j])
print(y_val[j])

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy:.4f}')
print(f'Loss: {loss:.4f}')

In [None]:
# Confusion Matrix
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:
test_dataset = preprocess(zip_test_path, new_size)

In [None]:
y_real = model.predict(X_test)
y_real = (y_real > 0.5).astype(int)

In [None]:
num_samples = 5  # Number of samples to display
random_indices = np.random.choice(len(y_real), num_samples, replace=False)
plt.figure(figsize=(15, 5))
for i, idx in enumerate(random_indices):
    plt.subplot(1, num_samples, i + 1)
    plt.imshow(X_test[idx].reshape(128, 128), cmap='gray')  # Reshape for display and use grayscale
    # true_label = 'dog' if y_test[idx] == 1 else 'cat'  # True label
    # pred_label = 'dog' if y_pred[idx][0] == 1 else 'cat'  # Predicted label
    plt.title(f'Pred: {y_real[random_indices][0]}')
    plt.axis('off')
plt.tight_layout()
plt.show()