# Image Classifier Model

In [None]:
import os
import io
import json
import zipfile
import random
import pickle
import shutil
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.preprocessing import image
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten


**Upload and processing the data**

In [None]:
url = "https://storage.googleapis.com/datascience-materials/dogs-vs-cats.zip"
response = requests.get(url)
response.raise_for_status()

with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    z.extractall("../src/dogs-vs-cats")

In [None]:
images_folder = "../src/dogs-vs-cats/dogs-vs-cats/train"
source_folder = "../data/processed"
cats_folder = "../data/processed/cats"
dogs_folder = "../data/processed/dogs"

# Crear carpetas si no existen
os.makedirs(source_folder, exist_ok=True)
os.makedirs(cats_folder, exist_ok=True)
os.makedirs(dogs_folder, exist_ok=True)

# Contadores para limitar 100 por clase
cat_count = 0
dog_count = 0
max_images = 250  # máximo por clase

for filename in os.listdir(images_folder):
    file_path = os.path.join(images_folder, filename)
    if os.path.isfile(file_path):
        nombre = filename.lower()
        if nombre.startswith("cat") and cat_count < max_images:
            shutil.copy(file_path, os.path.join(cats_folder, filename))
            cat_count += 1
        elif nombre.startswith("dog") and dog_count < max_images:
            shutil.copy(file_path, os.path.join(dogs_folder, filename))
            dog_count += 1

        # Romper el loop si ya tenemos 100 de cada clase
        if cat_count >= max_images and dog_count >= max_images:
            break

In [39]:
def load_and_preprocess_images(data_dir, target_size=(224, 224)):
    images = []
    labels = []

    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for filename in os.listdir(label_dir):
                img_path = os.path.join(label_dir, filename)
                try:
                    img = image.load_img(img_path, target_size=target_size)
                    img_array = image.img_to_array(img)
                    img_array /= 255.0  # Normalizar los valores de píxeles
                    images.append(img_array)
                    # Asigna la etiqueta 0 para "Cat" y 1 para "Dog"
                    if label == "cats":
                        labels.append(0)
                    elif label == "dogs":
                        labels.append(1)
                except Exception as e:
                    print(f"Error cargando la imagen {img_path}: {e}")

    return np.array(images), np.array(labels)


images, labels = load_and_preprocess_images("../data/processed")

**Split Train & Test**

In [42]:
X_train, X_test, Y_train, Y_test = train_test_split(images,labels, test_size=0.2, random_state=42)

In [None]:
source_dir = "../data/processed"

train_dir = "../data/train"
test_dir = "../data/test"

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

classes = ["cats", "dogs"]

for cls in classes:
    cls_path = os.path.join(source_dir, cls)
    files = os.listdir(cls_path)

    train_files, test_files = train_test_split(files, test_size=0.2, random_state=42)
    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(test_dir, cls), exist_ok=True)

    for f in train_files:
        shutil.copy(os.path.join(cls_path, f), os.path.join(train_dir, cls, f))

    for f in test_files:
        shutil.copy(os.path.join(cls_path, f), os.path.join(test_dir, cls, f))

In [None]:
train_datagen = ImageDataGenerator(rescale=1/255.)
test_datagen = ImageDataGenerator(rescale=1/255.)

train_data = train_datagen.flow_from_directory(
    "../data/train",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)

test_data = test_datagen.flow_from_directory(
    "../data/test",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical",
    shuffle=False
)

print(train_data.class_indices)

Found 800 images belonging to 2 classes.
Found 200 images belonging to 2 classes.
{'cats': 0, 'dogs': 1}


In [None]:
model = Sequential()
model.add(Conv2D(input_shape = (224,224,3), filters = 64, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 64,kernel_size = (3,3),padding = "same", activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2),strides = (2,2)))
model.add(Conv2D(filters = 128, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 128, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2),strides = (2,2)))
model.add(Conv2D(filters = 256, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 256, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 256, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2),strides = (2,2)))
model.add(Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2),strides = (2,2)))
model.add(Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2),strides = (2,2)))

model.add(Flatten())
model.add(Dense(units = 4096,activation = "relu"))
model.add(Dense(units = 4096,activation = "relu"))
model.add(Dense(units = 2, activation = "softmax"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.compile(optimizer = "adam", loss = CategoricalCrossentropy(from_logits = True), metrics = ["accuracy"])

In [None]:
model.fit(
    train_data,
    epochs=2,
    validation_data=test_data
)

  self._warn_if_super_not_called()


Epoch 1/2


  output, from_logits = _get_logits(


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2151s[0m 85s/step - accuracy: 0.4645 - loss: 0.7010 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 2/2
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2121s[0m 84s/step - accuracy: 0.5242 - loss: 0.6930 - val_accuracy: 0.5000 - val_loss: 0.6933


<keras.src.callbacks.history.History at 0x7c7d92e344a0>

In [None]:
checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1
)


**Save the model**

In [None]:
model.save('image-classifier-model.h5')



**Make predictions**

In [None]:

# Load the trained model 
modelo = load_model("image-classifier-model.h5")

# Test directories
test_dir = "../data/test"

# Pick a random image
random_class = random.choice(["cats", "dogs"])
class_dir = os.path.join(test_dir, random_class)

random_image = random.choice(os.listdir(class_dir))
image_path = os.path.join(class_dir, random_image)

print(f" Selected image: {random_image} (from {random_class})")

img = image.load_img(image_path, target_size=(224, 224))
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)

# Predict
prediction = modelo.predict(img_array)[0][0]

if prediction < 0.5:
    print(f"Predicted: It's a Cat (confidence: {1 - prediction:.4f})")
else:
    print(f"Predicted: It's a Dog (confidence: {prediction:.4f})")



 Selected image: dog.6647.jpg (from dogs)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 805ms/step
Predicted: It's a Dog (confidence: 0.5004)
