# Setting Up

In [27]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
data_directory = os.path.join(os.getcwd(), 'drive/My Drive/Dataset')

# Question 1 - Binary Image Classification

In [22]:
# Define directories for chickens and ducks
train_chicken_dir = data_directory+'/chicken-images/data/train'
test_chicken_dir = data_directory+'/chicken-images/data/test'
val_chicken_dir = data_directory+'/chicken-images/data/val'

train_duck_dir = data_directory+'/duck-images/data/train'
test_duck_dir = data_directory+'/duck-images/data/test'
val_duck_dir = data_directory+'/duck-images/data/val'


In [14]:
os.listdir(chicken_train_dir)[0:5]

['1LRILIIFI626.jpg',
 '36BT5ZUKCAX6.jpg',
 '1FWMJVFXLG6Y.jpg',
 '08OZ5SPN1U9F.jpg',
 '1BD9DQ8ZT4UR.jpg']

# Utility Functions

In [15]:
img_size = (224, 224)
batch_size = 32

In [20]:
def load_images_from_directory(directory, target_size):
    images = []
    labels = []
    for subdir, _, files in os.walk(directory):
        for file in files:
            filepath = os.path.join(subdir, file)
            try:
                img = Image.open(filepath)
                img = img.resize(target_size)
                img = img.convert('RGB')
                images.append(np.array(img))
                labels.append(os.path.basename(subdir))
            except Exception as e:
                print(f"Error loading image {filepath}: {e}")
    return np.array(images), np.array(labels)

## Loading and Preprocessing the Dataset

In [40]:
# Load train images
chicken_images, chicken_labels = load_images_from_directory(train_chicken_dir, img_size)
duck_images, duck_labels = load_images_from_directory(train_duck_dir, img_size)

# Load test images
test_chicken_images, test_chicken_labels = load_images_from_directory(test_chicken_dir, img_size)
test_duck_images, test_duck_labels = load_images_from_directory(test_duck_dir, img_size)

In [41]:
# Convert labels to binary (0 for chicken, 1 for duck)
chicken_labels = np.zeros(len(chicken_labels))
duck_labels = np.ones(len(duck_labels))

test_chicken_labels = np.zeros(len(test_chicken_labels))
test_duck_labels = np.ones(len(test_duck_labels))

In [42]:
# Concatenate chicken and duck images and labels
X_train = np.concatenate([chicken_images, duck_images], axis=0)
y_train = np.concatenate([chicken_labels, duck_labels], axis=0)

X_test = np.concatenate([test_chicken_images, test_duck_images], axis=0)
y_test = np.concatenate([test_chicken_labels, test_duck_labels], axis=0)

In [26]:
print("Loaded", len(X_train), "training images.")
print("Loaded", len(y_train), "training labels.")

Loaded 897 training images.
Loaded 897 training labels.


In [29]:
# Load the pre-trained VGG16 model without the top (classification) layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_size[0], img_size[1], 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [30]:
# Freeze the weights of the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

In [31]:
# Create a new model on top of the pre-trained model
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [34]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [35]:
# Compile the model
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [36]:
# Evaluate the model on validation set
val_loss, val_acc = model.evaluate(X_val, y_val)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_acc)

Validation Loss: 0.5237137079238892
Validation Accuracy: 0.9833333492279053


In [None]:
# Predict on test set
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)

In [43]:
# Evaluate the predictions
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Test Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Test Accuracy: 0.9232365145228216
Precision: 0.941747572815534
Recall: 0.9387096774193548
F1 Score: 0.9402261712439419
