1 CNN for image classification

1. Set up your environment: Ensure that you have installed all the required software packages.

In [1]:
import tensorflow as tf
import pathlib
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras
from sklearn.metrics import confusion_matrix, precision_score, recall_score
import matplotlib.pyplot as plt


2. Prepare your dataset: Choose a data set from UCI Machine Learning Repository that is appropriate for classification. Download the selected dataset.

In [4]:
!git clone https://github.com/lakshithagnk/CNN-image-classification.git


Cloning into 'CNN-image-classification'...
remote: Enumerating objects: 9070, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 9070 (delta 0), reused 6 (delta 0), pack-reused 9064 (from 1)[K
Receiving objects: 100% (9070/9070), 146.66 MiB | 24.86 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Updating files: 100% (9117/9117), done.


In [5]:
data_dir = pathlib.Path('CNN-image-classification/defungi')

# Load the full dataset without any split
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    image_size=(128, 128),
    batch_size=None,
    color_mode='grayscale',  # Convert to grayscale
    seed=42
)

# Separate images and labels
images, labels = [], []
for img, label in dataset:
    images.append(img)
    labels.append(label)

images = np.array(images)
labels = np.array(labels)


Found 9114 files belonging to 5 classes.


In [6]:
images = images / 255.0  # Normalize pixel values to [0, 1]
images = images.reshape(-1, 128, 128, 1)

3. Split the dataset into training, validation, and testing subsets using a ratio of 60% for training and 20% each for validation and testing sets.

In [7]:
# 60% Train, 40% Temp (to split into validation and test)
x_train, x_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.4, random_state=42)

# 50% Validation, 50% Test from the temp set (20% each of the total data)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)


4. Build the CNN model: A common CNN design consists of interleaving convolutional and max-pooling layers, ending with a linear classification layer

In [8]:
model = tf.keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),

    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),

    # 5 classes for output
    layers.Dense(5, activation='softmax')
])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


7. Train the model: Train the model using the training data for 20 epochs and plot
training and validation loss for with respect to epoch. Here, for the optimizer
you may use adam and sparse categorical crossentropy as the loss function. Set
a suitable learning rate.

In [9]:
learning_rate = 0.00005

optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
#optimizer = keras.optimizers.SGD(learning_rate=learning_rate,momentum=0.9)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train, epochs=20,batch_size=128, validation_data=(x_val, y_val))


Epoch 1/20
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 3s/step - accuracy: 0.4483 - loss: 1.4959 - val_accuracy: 0.4723 - val_loss: 1.3577


In [None]:
# Plot training and validation loss VS epochs
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.show()


10. Evaluate the Model: After training, evaluate the model’s performance on the testing
dataset. Record the train/test accuracy, confusion matrix, precision and recall.

In [None]:
# Evaluate on the test dataset
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict class labels on the test dataset
y_pred = np.argmax(model.predict(x_test), axis=1)
y_true = y_test

# Confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Calculate precision and recall
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
