In [14]:
# CNN + KNN dataset handled.

In [15]:
import numpy as np
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


In [16]:
def load_and_preprocess_images(folder_path):
    images = []
    labels = []
    for label_folder in os.listdir(folder_path):
        label_folder_path = os.path.join(folder_path, label_folder)
        if os.path.isdir(label_folder_path):
            for img_file in os.listdir(label_folder_path):
                img_path = os.path.join(label_folder_path, img_file)
                if img_path.lower().endswith('.pgm'):
                    # img = Image.open(img_path).convert('L')  # Convert to grayscale
                    img = Image.open(img_path)
                    img = img.resize((112, 92))  # Resize to your desired input shape
                    img_array = np.array(img)
                    images.append(img_array)
                    labels.append(label_folder)
    return np.array(images), np.array(labels)

# Load data
folder_path = './dataset'
images, labels = load_and_preprocess_images(folder_path)

# Normalize images and add channel dimension
# images = images / 255.0
images = np.expand_dims(images, axis=-1)  # Add channel dimension

# Encode labels
le = LabelEncoder()
encoded_labels = le.fit_transform(labels)


In [17]:
from sklearn.model_selection import train_test_split
import numpy as np

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(images, encoded_labels, test_size=0.4, random_state=42, stratify=encoded_labels)

# Verify the proportions
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")

Training set size: 240 samples
Test set size: 160 samples


In [18]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np

# Define input shape
input_shape = (92, 112, 1)  # Grayscale images with shape (height, width, channels)

def build_cnn_model(input_shape: tuple, num_classes: int) -> Model:
    inputs = Input(shape=input_shape)
    x = Conv2D(32, (3, 3), activation='relu')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)  # Change output layer to num_classes
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Determine number of classes
num_classes = len(np.unique(encoded_labels))

# Build and compile the CNN model
cnn_model = build_cnn_model(input_shape, num_classes)
cnn_model.compile(optimizer=Adam(learning_rate=0.0003), loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
cnn_model.summary()


In [19]:
from tensorflow.keras.utils import to_categorical

# Encode labels as one-hot vectors
y_train_encoded = to_categorical(y_train, num_classes=num_classes)
# y_val_encoded = to_categorical(y_val, num_classes=num_classes)
y_test_encoded = to_categorical(y_test, num_classes=num_classes)


In [20]:
from tensorflow.keras.models import Model

# Define a model for feature extraction (remove final classification layer)
feature_extractor = Model(inputs=cnn_model.input, outputs=cnn_model.layers[-2].output)

# Extract features from the training, validation, and test sets
X_train_features = feature_extractor.predict(X_train)
# X_val_features = feature_extractor.predict(X_val)
X_test_features = feature_extractor.predict(X_test)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


In [21]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize and train the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_features, y_train)

# Make predictions
# y_val_pred = knn.predict(X_val_features)
y_test_pred = knn.predict(X_test_features)

# Evaluate the KNN classifier
# print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))

# Detailed classification report
# print("Validation Classification Report:")
# print(classification_report(y_val, y_val_pred))

print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))


Test Accuracy: 0.7625
Test Classification Report:
              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      1.00      1.00         4
           3       0.50      1.00      0.67         4
           4       0.57      1.00      0.73         4
           5       0.80      1.00      0.89         4
           6       0.80      1.00      0.89         4
           7       1.00      0.50      0.67         4
           8       0.67      1.00      0.80         4
           9       0.50      0.75      0.60         4
          10       0.75      0.75      0.75         4
          11       1.00      1.00      1.00         4
          12       1.00      1.00      1.00         4
          13       0.67      1.00      0.80         4
          14       0.57      1.00      0.73         4
          15       0.60      0.75      0.67         4
          16       1.00      1.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# Initialize and train the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_features, y_train)

# Make predictions on the test set
y_test_pred = knn.predict(X_test_features)

# Evaluate the KNN classifier
accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted')  # Use 'weighted' for multi-class
recall = recall_score(y_test, y_test_pred, average='weighted')  # Use 'weighted' for multi-class
f1 = f1_score(y_test, y_test_pred, average='weighted')  # Use 'weighted' for multi-class

# Print metrics
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Score: {f1:.4f}")

# Detailed classification report
print("\nTest Classification Report:")
print(classification_report(y_test, y_test_pred))


Test Accuracy: 0.7625
Test Precision: 0.7716
Test Recall: 0.7625
Test F1 Score: 0.7322

Test Classification Report:
              precision    recall  f1-score   support

           0       0.57      1.00      0.73         4
           1       1.00      0.50      0.67         4
           2       1.00      1.00      1.00         4
           3       0.50      1.00      0.67         4
           4       0.57      1.00      0.73         4
           5       0.80      1.00      0.89         4
           6       0.80      1.00      0.89         4
           7       1.00      0.50      0.67         4
           8       0.67      1.00      0.80         4
           9       0.50      0.75      0.60         4
          10       0.75      0.75      0.75         4
          11       1.00      1.00      1.00         4
          12       1.00      1.00      1.00         4
          13       0.67      1.00      0.80         4
          14       0.57      1.00      0.73         4
          15       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
