# <div style="text-align: center; background-color: #595964; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">Dog vs Cat clssification</div>

<a id="1"></a>
# <div style="text-align: center; background-color: #00BFFF; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">1. Import Necessary Libraries</div>

In [None]:
!pip install visualkeras

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go 
import itertools
import plotly.express as px
import cv2
import tensorflow as tf
import random
import zipfile
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import visualkeras

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">2. Unzip and extract data </div>

In [None]:
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/train.zip', 'r') as train_zip:
    train_zip.extractall('.')  
image_dir = "../working/train/"
filenames = os.listdir(image_dir)
labels = [x.split(".")[0] for x in filenames]
df = pd.DataFrame({"filename": filenames, "label": labels})
df.head()

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">3. Check for data imabalance and number of categories </div>

In [None]:
count = df['label'].value_counts()

count 


In [None]:
fig = go.Figure(go.Bar(
            x= count.values,
            y=count.index,
            orientation='h'))

fig.update_layout(title='Data Distribution in Bars',font_size=15,title_x=0.45)


fig.show()

fig=px.pie(count.head(10),values= 'count', names=df['label'].unique(),hole=0.425)
fig.update_layout(title='Data Distribution of Data',font_size=15,title_x=0.45,annotations=[dict(text='Cat vs Dog',font_size=18, showarrow=False,height=800,width=700)])
fig.update_traces(textfont_size=15,textinfo='percent')
fig.show()

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">4. Visualize the data for each category </div>

In [None]:
grouped_data = df.groupby("label")

num_images_per_category = 5

fig, axes = plt.subplots(len(grouped_data), num_images_per_category, figsize=(20, 20))

for i, (category, group) in enumerate(grouped_data):
  
    random_indices = random.sample(range(len(group)), num_images_per_category)

    for j, index in enumerate(random_indices):
        filename = group.iloc[index]["filename"]
        label = group.iloc[index]["label"]
        image_path = os.path.join(image_dir, filename)
        image = Image.open(image_path)

        axes[i, j].imshow(image)
        axes[i, j].set_title("Label: " + label, fontsize = 30)
        

plt.tight_layout()
plt.show()

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">5. Create train, validation and test datasets </div>

In [None]:
labels = df['label']
X_train, X_temp = train_test_split(df, test_size=0.2, stratify=labels, random_state = 23)

label_test_val = X_temp['label']
X_test, X_val = train_test_split(X_temp, test_size=0.5, stratify=label_test_val, random_state = 23)

print ('X_train:', X_train['label'].value_counts())

print ('X_val:', X_val['label'].value_counts())

print ('X_test:', X_test['label'].value_counts())

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">6. Create image data generator</div>

In [None]:
batch_size = 64
size = (370, 370)
idg = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function = tf.keras.applications.vgg16.preprocess_input)

In [None]:
train_idg = ImageDataGenerator(rescale=1/255)
train_idg = idg.flow_from_dataframe(X_train, "train/", x_col= "filename", y_col= "label",
                                    batch_size = batch_size,
                                    target_size=size)

In [None]:
val_idg = ImageDataGenerator(rescale=1/255)
val_idg = idg.flow_from_dataframe(X_val, "train/", x_col="filename", y_col="label",
                                  batch_size = batch_size,
                                  target_size = size, shuffle = False)

In [None]:
test_idg = ImageDataGenerator(rescale=1/255)
test_idg = idg.flow_from_dataframe(X_test, "train/", x_col= "filename", y_col= "label",
                                    batch_size = batch_size,
                                    target_size=size, shuffle = False)

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">7. Download vgg16 model and Building the Model </div>

In [None]:
vgg16_model = tf.keras.applications.vgg16.VGG16(include_top=False, input_shape=(370, 370, 3))

In [None]:
vgg16_model.summary()

In [None]:
for layer in vgg16_model.layers:
    layer.trainable = False

In [None]:
flat = tf.keras.layers.Flatten() (vgg16_model.output)
dropout1 = tf.keras.layers.Dropout(0.2, name="Dropout1") (flat)
dense1 = tf.keras.layers.Dense(128, activation="relu") (dropout1)
dropout2 = tf.keras.layers.Dropout(0.2, name="Dropout2")(dense1)
output = tf.keras.layers.Dense(2, activation="softmax") (dropout2)

final_model = tf.keras.models.Model(inputs=[vgg16_model.input], outputs=[output])

In [None]:
tf.keras.utils.plot_model(final_model, show_shapes = True, show_layer_names=True)


In [None]:
visualkeras.layered_view(final_model, legend=True)

In [None]:
'''model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 300x300 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(370, 370, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fifth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    # Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('horses') and 1 for the other ('humans')
    tf.keras.layers.Dense(2, activation='sigmoid')
])'''

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">8. Compile the model</div>

In [None]:
final_model.compile(optimizer='adam',
                    loss=tf.keras.losses.categorical_crossentropy,
                    metrics = ["acc"])

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">9. Create a Callback</div>

In [None]:
model_ckpt = tf.keras.callbacks.ModelCheckpoint("DogCat",
                                                monitor="val_loss",
                                                save_best_only=True)

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">10. Train the model </div>

In [None]:
history = final_model.fit(train_idg, batch_size=batch_size, validation_data=val_idg, epochs = 4, callbacks=[model_ckpt])

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">11. Evaluate accuracy and loss for the model
 </div>

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(4)

plt.figure(figsize=(15, 10))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label = 'Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">12. Check the accuracy of the model on test subset</div>

In [None]:
cat_dog_model = tf.keras.models.load_model("DogCat")

result = cat_dog_model.predict(test_idg)

result_argmax = np.argmax(result, axis=1)

y_true = test_idg.labels

y_pred = result_argmax

accuracy = (y_pred == y_true).mean()

print("Test Accuracy:", accuracy)

In [None]:
# Evaluate the model
report = classification_report(y_true, y_pred)

# Split the classification report into lines
report_lines = report.split('\n')

# Create header row
header_row = report_lines[0].split()

# Create rows for each class
table_rows = [row.split() for row in report_lines[2:-5]]

# Create the table using Plotly
table = go.Figure(data=[go.Table(header=dict(values=header_row),
                 cells=dict(values=list(zip(*table_rows))))
                 ])

# Show the table
table.show()

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">13. Make predictions on kaggle test data for submission</div>

In [None]:
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/test1.zip', 'r') as test1_zip:
    test1_zip.extractall('.')  
test_dir = "../working/test1/"
filenames = os.listdir(test_dir)
test_data = pd.DataFrame({"filename": filenames})
test_data['label'] = 'unknown'
test_data.head()

In [None]:
test1_idg =  idg.flow_from_dataframe(test_data, "test1/", x_col= "filename",y_col = 'label',
                                    batch_size = batch_size,
                                    target_size=size, shuffle = False)

In [None]:
test1_predict = cat_dog_model.predict(test1_idg)

test1_predict_argmax = np.argmax(test1_predict, axis=1)

y_test_pred = test1_predict_argmax

test_data['label'] = y_test_pred

test_data.head()

In [None]:
label_mapping = {0: 'cat', 1: 'dog'}
test_data['label'] = test_data['label'].map(label_mapping)
test_data.head()

<a id="1"></a>
# <div style="text-align: center; background-color: #6495ED; font-family:Times New Roman; color: white; padding: 14px; line-height: 1;border-radius:20px">14. Visualize the predicted results</div>

In [None]:
fig, axes = plt.subplots(1, 10, figsize=(20, 10))
for idx in range(10):
    image_path = os.path.join(test_dir, test_data.iloc[idx]['filename'])
    image = Image.open(image_path)
    axes[idx].imshow(image)
    axes[idx].set_title("Label: " + test_data.iloc[idx]['label'])
    axes[idx].axis('off')
plt.show()