## Loading Libraries

In [None]:
# Basic
import os
from os import makedirs
from os import listdir
from shutil import copyfile
from random import seed
from random import random
import numpy as np
import pandas as pd
import zipfile

# visuals
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.image import imread
from PIL import Image

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,ConfusionMatrixDisplay

# Tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense,MaxPooling2D,Dropout,Flatten,BatchNormalization,Conv2D
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping

## Data Extraction

#### Unzip the File

In [None]:
datasetzipfile = "datasets.zip"

# Check if the zip file exists
if os.path.exists(datasetzipfile):
    # Open the zip file in read mode
    with zipfile.ZipFile(datasetzipfile, 'r') as zip_ref:
        # Iterate through each file in the zip archive
        for file_info in zip_ref.infolist():
            # Check if the file already exists in the current directory
            if not os.path.exists(file_info.filename):
                zip_ref.extract(file_info)
else:
    print(f"{datasetzipfile} does not exist.")

#### Set Paths

In [None]:
train_path_dog = "datasets/train/dog"
train_path_cat = "datasets/train/cat"
valid_path_dog = "datasets/val/dog"
valid_path_cat = "datasets/val/cat"

test_path = "datasets/test"

## Data Exploration

#### Function to check path (Unused)

In [None]:
def list_files_in_directory(path):
    try:
        # List all files in the specified directory
        files = os.listdir(path)
        
        # Filter out directories, only keep files
        files = [f for f in files if os.path.isfile(os.path.join(path, f))]
        
        return files
    except Exception as e:
        return str(e)

#files = list_files_in_directory(train_path_dog)
#print("Files in directory:", files)

#### Function to Display Head of Files

In [None]:
def display_files_head(image_dir):
    filenames = os.listdir(image_dir)
    labels = [x.split(".")[0] for x in filenames]
    
    data = pd.DataFrame({"filename": filenames, "label": labels})
    
    return data.head()

#display_files_head(train_path_dog)
#display_files_head(train_path_cat)

#### Visualisation of Dogs from Train Dataset

In [None]:
#Visualise Images of Dogs from Train Dataset

plt.figure(figsize=(20,20))
plt.subplots_adjust(hspace=0.4)

# Initialize a counter for the number of images plotted
count = 0

for i in range(10):
    filename = f'datasets/train/dog/dog.{i}.jpg'
    
    # Check if the file exists
    if os.path.exists(filename):
        plt.subplot(1, 10, count + 1)  # Create a subplot for the existing image
        image = imread(filename)
        plt.imshow(image)
        plt.title('Dog' + str(i), fontsize=12)
        plt.axis('off')
        
        count += 1  # Increment the counter for each plotted image

# Adjust the layout to accommodate the images
plt.show()

#### Visualisation of Cats from Train Dataset

In [None]:
#Visualise Images of Cats from Train Dataset

plt.figure(figsize=(20,20))
plt.subplots_adjust(hspace=0.4)

# Initialize a counter for the number of images plotted
count = 0

for i in range(10):
    filename = f'datasets/train/cat/cat.{i}.jpg'
    
    # Check if the file exists
    if os.path.exists(filename):
        plt.subplot(1, 10, count + 1)  # Create a subplot for the existing image
        image = imread(filename)
        plt.imshow(image)
        plt.title('Cat' + str(i), fontsize=12)
        plt.axis('off')
        
        count += 1  # Increment the counter for each plotted image

# Adjust the layout to accommodate the images
plt.show()

#### Training, Validation and Test Data Count

In [None]:
#Count the Data Provided
def count_files_in_directory(path):
    try:
        # List all entries in the specified directory
        all_entries = os.listdir(path)
        
        # Count only the files (exclude directories)
        total_files = sum(1 for entry in all_entries if os.path.isfile(os.path.join(path, entry)))
        
        return total_files
    except Exception as e:
        return str(e)

# Specify the directory path
print("Total number of dog images in training data :", count_files_in_directory(train_path_dog))
print("Total number of cat images in training data :", count_files_in_directory(train_path_cat))
print()
print("Total number of dog images in validation data :", count_files_in_directory(valid_path_dog))
print("Total number of cat images in validation data :", count_files_in_directory(valid_path_cat))
print()
print("Total number of unknown images in test data :", count_files_in_directory(test_path))

### Continue....

In [None]:
# Parameters
image_size = 150  # Size to which the images will be resized
batch_size = 32   # Number of images to be yielded from the generator per batch

# Data Generators
train_datagen = ImageDataGenerator(rescale=1./255,
                                    rotation_range=15,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    shear_range=0.1,
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load datasets
train_generator = train_datagen.flow_from_directory(
    'datasets/train/',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='binary'  # Binary classification (dog vs. cat)
)

val_generator = val_datagen.flow_from_directory(
    'datasets/val/',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)

test_generator = test_datagen.flow_from_directory(
    'datasets/test/',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode=None,  # No labels expected for test set
    shuffle=False
)

# CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(image_size, image_size, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    Flatten(),
    
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Use sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=2, factor=0.5, min_lr=0.00001, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

# Train the model
cat_dog = model.fit(
    train_generator,
    validation_data=val_generator,
    callbacks=[early_stopping, learning_rate_reduction],
    epochs=30,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator)
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator))
print(f'Test accuracy: {test_accuracy:.2f}')