In [1]:
import os
import pandas as pd
from PIL import Image

In [2]:
dataset_path = r'C:\Users\jrola\Documents\Dogs-cats project\dogs-vs-cats\train\train'

In [3]:
data = []
for file in os.listdir(dataset_path):
    label = file.split(".")[0]
    data.append((file, label))

In [4]:
df = pd.DataFrame(data, columns=["filename", "label"])

In [5]:
def get_image_size(file):
    with Image.open(os.path.join(dataset_path, file)) as img:
        return img.size

df['width'], df['height'] = zip(*df['filename'].apply(get_image_size))

In [6]:
df.to_csv('dogs_vs_cats_labels.csv', index=False)

In [7]:
csv_path = r"C:\Users\jrola\Documents\Dogs-cats project\dogs_vs_cats_labels.csv"

df = pd.read_csv(csv_path)

print(df.head())

In [8]:
test_folder = r"C:\Users\jrola\Documents\Dogs-cats project\dogs-vs-cats\test1\test1"

file_list = os.listdir(test_folder)

test_data_df = pd.DataFrame({
    'id': range(1, 12501),
    "file_path": [os.path.join(test_folder, f"{i}.jpg") for i in range(1, 12501)]
})

print(test_data_df.shape)
print(test_data_df.head())

test_data_df['prediction'] = None

test_data_df.to_csv('test_data.csv', index=False)

In [9]:
from sklearn.model_selection import train_test_split

# Split the DataFrame into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])

# Save the DataFrames for later use if needed
train_df.to_csv('train_set.csv', index=False)
val_df.to_csv('val_set.csv', index=False)

# Display the number of samples in each set
print(f"Training samples: {len(train_df)}, Validation samples: {len(val_df)}")

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_size = (150, 150)

# Create data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Create training data generator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=r"C:\Users\jrola\Documents\Dogs-cats project\dogs-vs-cats\train\train",
    x_col='filename',
    y_col='label',
    target_size=image_size,
    batch_size=32,
    class_mode='binary'
)

# Create validation data generator
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=r"C:\Users\jrola\Documents\Dogs-cats project\dogs-vs-cats\train\train",
    x_col='filename',
    y_col='label',
    target_size=image_size,
    batch_size=32,
    class_mode='binary'
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Correct the test generator setup
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data_df,
    directory=r"C:\Users\jrola\Documents\Dogs-cats project\dogs-vs-cats\test1\test1",
    x_col='file_path',
    y_col=None,
    target_size=image_size,
    batch_size=32,
    class_mode=None,
    shuffle=False
)

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=val_generator,
    validation_steps=len(val_generator)
)

In [13]:
#loss, accuracy = model.evaluate(test_generator, steps=len(test_generator))
#print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [14]:
predictions = model.predict(test_generator, steps=len(test_generator))
predicted_classes = (predictions > 0.5).astype(int).flatten()

# Update the test DataFrame with predictions
test_data_df['prediction'] = predicted_classes
test_data_df.to_csv('test_predictions.csv', index=False)

In [15]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Display some sample predictions
def plot_sample_predictions(df, image_dir, num_samples=9):
    plt.figure(figsize=(12, 12))
    sample_df = df.sample(n=num_samples)
    for i, row in enumerate(sample_df.itertuples()):
        img = Image.open(row.file_path)
        plt.subplot(3, 3, i + 1)
        plt.imshow(img)
        plt.title(f"Predicted: {'Dog' if row.prediction == 1 else 'Cat'}")
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Call the function with your test DataFrame and directory
plot_sample_predictions(test_data_df, test_folder)

In [16]:
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()