# Import Libraries

In [None]:
# import os libraries
import os
import shutil
import itertools
import pathlib
from PIL import Image

# import data handling
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
from sklearn.metrics import confusion_matrix , classification_report, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split

# import deep learning tools
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D , MaxPooling2D , Dense , BatchNormalization , Dropout,Flatten , Activation
from tensorflow.keras.optimizers import Adam , Adamax
from tensorflow.keras import regularizers

# warnings
import warnings
warnings.filterwarnings('ignore')

# Unzipping Data

In [None]:
# Unzips the train file 
!unzip /kaggle/input/dogs-vs-cats/train.zip

In [None]:
# Unzips the test1 file
!unzip /kaggle/input/dogs-vs-cats/test1.zip

In [None]:
# Counts the number of files
path , dirs , files = next(os.walk('/kaggle/working/train'))
file_count = len(files)

# Should be 25000 files
print(file_count)
base_dir = '/kaggle/working/dogs_vs_cats_small'
os.mkdir(base_dir)

# Organize the data
Creates a directory structure for organizing files

In [None]:
# Make a new train directory inside my base directory
train_dir = os.path.join(base_dir , 'train')
os.mkdir(train_dir)

# Make a new validation directory inside my base directory
valid_dir= os.path.join(base_dir , 'validation')
os.mkdir(valid_dir)

# Make a new test directory inside my base directory
test_dir = os.path.join(base_dir , 'test')
os.mkdir(test_dir)

# Make a new cats directory inside my train directory
train_cats_dir = os.path.join(train_dir , 'cats')
os.mkdir(train_cats_dir)

# Make a new dogs directory inside my train directory
train_dogs_dir = os.path.join(train_dir , 'dogs')
os.mkdir(train_dogs_dir)

# Make a new cats directory inside my validation directory
valid_cats_dir = os.path.join(valid_dir, 'cats')
os.mkdir(valid_cats_dir)

# Make a new dogs directory inside my validation directory
valid_dogs_dir = os.path.join(valid_dir , 'dogs')
os.mkdir(valid_dogs_dir)

# Make a new cats directory inside my test directory
test_cats_dir = os.path.join(test_dir , 'cats')
os.mkdir(test_cats_dir)

# Make a new dogs directory inside my test directory
test_dogs_dir = os.path.join(test_dir , 'dogs')
os.mkdir(test_dogs_dir)

In [None]:
# Counts the number of files in the directory working Kaggle directory 
dir_path = '/kaggle/working/train'

cat_count = 0
dog_count= 0
for i in os.listdir(dir_path):
    if i.startswith('cat.'):
        cat_count += 1
    elif i.startswith('dog.'):
        dog_count += 1

# Split the dataset
This snippet is responsible for splitting the dataset images into training, validation, and test sets by copying specific files into their respective directories

In [None]:
original_dataset_dir = '/kaggle/working/train'

fnames = ['cat.{}.jpg'.format(i) for i in range(10001)] #Move 10,000 cat images from the original dataset to the train cat directory
for fname in fnames:
  src = os.path.join(original_dataset_dir , fname)
  dst = os.path.join(train_cats_dir , fname)
  shutil.copyfile(src , dst)

fnames = ['cat.{}.jpg'.format(i) for i in range(10001,11251)] #Move 1250 cat images from the original dataset to the validation cat directory
for fname in fnames:
  src = os.path.join(original_dataset_dir , fname)
  dst = os.path.join(valid_cats_dir , fname)
  shutil.copyfile(src,dst)

fnames = ['cat.{}.jpg'.format(i) for i in range(11251,12500)] #Move 1250 cat images from the original dataset to the test cat directory
for fname in fnames:
  src= os.path.join(original_dataset_dir , fname)
  dst= os.path.join(test_cats_dir, fname)
  shutil.copyfile(src,dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(10001)] #Move 10,000 dog images from the original dataset to the train cat directory
for fname in fnames:
  src = os.path.join(original_dataset_dir , fname)
  dst = os.path.join(train_dogs_dir ,fname)
  shutil.copyfile(src,dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(10001,11251)] #Move 1250 dog images from the original dataset to the validation cat directory
for fname in fnames:
  src = os.path.join(original_dataset_dir,fname)
  dst = os.path.join(valid_dogs_dir , fname)
  shutil.copyfile(src,dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(11251,12500)]#Move 1250 dog images from the original dataset to the test cat directory
for fname in fnames:
  src= os.path.join(original_dataset_dir, fname)
  dst = os.path.join(test_dogs_dir , fname)
  shutil.copyfile(src, dst)

# Data augmentation


In [None]:
img_size = (224 , 224)
batch_size = 32
img_shape = (img_size[0] , img_size[1] , 3)

tr_gen = ImageDataGenerator()
ts_gen = ImageDataGenerator()

train_gen = tr_gen.flow_from_directory(train_dir , target_size = img_size , class_mode = 'binary' , 
                                       color_mode = 'rgb' , shuffle = True , batch_size = batch_size)

valid_gen = ts_gen.flow_from_directory(valid_dir , target_size = img_size , class_mode = 'binary' , 
                                       color_mode = 'rgb' , shuffle = True , batch_size = batch_size)

test_gen = ts_gen.flow_from_directory(test_dir , target_size = img_size , class_mode = 'binary' , 
                                       color_mode = 'rgb' , shuffle = False , batch_size = batch_size)

# Visualize a sample of images from the dataset

In [None]:
g_dict = train_gen.class_indices
classes = list(g_dict.keys())
images, labels = next(train_gen)
num_samples = len(images)

plt.figure(figsize=(20, 20))

for i in range(min(16, num_samples)):
    plt.subplot(4, 4, i + 1)
    image = images[i] / 255
    plt.imshow(image)
    class_index = int(labels[i])
    class_name = classes[class_index]
    plt.title(class_name, color='blue', fontsize=12)
    plt.axis('off')
plt.show()

# EfficientNetB5 Architecture

In [None]:
img_shape = (img_size[0] , img_size[1] , 3)
base_model = tf.keras.applications.efficientnet.EfficientNetB5(include_top= False , weights= 'imagenet' ,
                                                               input_shape= img_shape,pooling= 'max')
base_model.trainable= False

num_classes = len(classes)

model = Sequential([
    base_model,
    BatchNormalization(axis = -1 , momentum = 0.99 , epsilon = 0.001),
    Dense(256, kernel_regularizer = regularizers.l2(l= 0.016) , activity_regularizer = regularizers.l1(0.006),
         bias_regularizer= regularizers.l1(0.006) , activation = 'relu'),
    Dropout(rate = 0.4 , seed = 40),
    Dense(1 , activation= 'sigmoid' )
])
model.compile(Adamax(learning_rate = 0.001) , loss = 'binary_crossentropy' , metrics= ['accuracy'])
model.summary()

# Train the model

In [None]:
history = model.fit(x= train_gen , validation_data= valid_gen , epochs= 10 , verbose = 1 , validation_steps = None , shuffle = False)

# Evaluate the model

In [None]:
train_score = model.evaluate(train_gen , steps= 32 , verbose = 1)
val_score = model.evaluate(valid_gen , steps = 32 , verbose = 1)
test_score = model.evaluate(test_gen , steps = 32 , verbose = 1)

print(f'Train loss = {train_score[0] }')
print(f'Train Accuracy = {train_score[1]}')
print(f'Validation loss = {val_score[0]}')
print(f'Validation Accuracy = {val_score[1]}')
print(f'Test loss = {test_score[0]}')
print(f'Test Accuracy = {test_score[1]}')

# Sample outline for prediction

In [None]:
def predict_image_using_model(model, image_path, img_size=(224, 224)):
    # Preprocess the image
    img = cv2.imread(image_path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    img = cv2.resize(img, img_size)
    img = np.array(img)
    img = np.expand_dims(img, axis=0)  # Expand dimensions to match the model's input shape
    
    # Make a prediction
    predictions = model.predict(img)
    predicted_class = int(predictions > 0.5)  # Convert probability to class label (0 or 1)
    
    # Interpret the prediction
    if predicted_class == 0:
        label = "cat"
    else:
        label = "dog"
    
    confidence = predictions[0][0] * 100 if label == "dog" else (1 - predictions[0][0]) * 100
    print(f"The image is predicted to be a {label} with {confidence:.2f}% confidence.")

# Usage example
image_path = "/kaggle/input/cat-image/download.jpg"
predict_image_using_model(model, image_path)