In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Path to dataset
dataset_dir = './drive/MyDrive/thesis/kaggle-dataset'

# Preprocessing function
def preprocess_image(image_path, img_size=(128, 128)):
    img = cv2.imread(image_path)
    img = cv2.resize(img, img_size)
    img = img_to_array(img) / 255.0
    return img

# Load dataset
def load_dataset(dataset_dir, img_size=(128, 128)):
    images = []
    labels = []
    class_names = sorted(os.listdir(dataset_dir))

    for class_idx, class_name in enumerate(class_names):
        class_dir = os.path.join(dataset_dir, class_name)
        if os.path.isdir(class_dir):
            for image_name in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_name)
                img = preprocess_image(image_path, img_size)
                images.append(img)
                labels.append(class_idx)

    images = np.array(images)
    labels = np.array(labels)

    return images, labels, class_names

# Load and split dataset
images, labels, class_names = load_dataset(dataset_dir)
labels = to_categorical(labels)

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build CNN model
def create_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

input_shape = X_train.shape[1:]
num_classes = len(class_names)

cnn_model = create_cnn_model(input_shape, num_classes)

# Train CNN model
cnn_model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=32)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 937ms/step - accuracy: 0.3366 - loss: 1.8237 - val_accuracy: 0.5128 - val_loss: 1.1520
Epoch 2/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 940ms/step - accuracy: 0.5349 - loss: 1.1136 - val_accuracy: 0.6694 - val_loss: 0.8894
Epoch 3/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 928ms/step - accuracy: 0.6664 - loss: 0.8313 - val_accuracy: 0.7428 - val_loss: 0.7255
Epoch 4/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 890ms/step - accuracy: 0.7805 - loss: 0.6146 - val_accuracy: 0.7766 - val_loss: 0.6276
Epoch 5/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 913ms/step - accuracy: 0.8350 - loss: 0.4554 - val_accuracy: 0.8038 - val_loss: 0.5919
Epoch 6/10
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 921ms/step - accuracy: 0.8834 - loss: 0.3229 - val_accuracy: 0.7906 - val_loss: 0.5928
Epoc

<keras.src.callbacks.history.History at 0x7c7c6bfa4e20>

In [5]:
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder

# Example of using SVM with self-training
def semi_supervised_learning(X_train, y_train, X_unlabeled):
    # Convert one-hot labels to integer class labels
    y_train_int = np.argmax(y_train, axis=1)

    # SVM for supervised learning
    base_model = SVC(probability=True)

    # Self-Training Classifier
    self_training_model = SelfTrainingClassifier(base_model)

    # Train with labeled data
    self_training_model.fit(X_train.reshape(X_train.shape[0], -1), y_train_int)

    # Predict on unlabeled data and fine-tune
    pseudo_labels = self_training_model.predict(X_unlabeled.reshape(X_unlabeled.shape[0], -1))

    # Optionally, combine pseudo-labeled data with labeled data and retrain
    return pseudo_labels

# Assume X_unlabeled contains unlabeled data
pseudo_labels = semi_supervised_learning(X_train, y_train, X_test)




In [6]:
from sklearn.metrics import classification_report, confusion_matrix

# Supervised model evaluation
y_pred_supervised = cnn_model.predict(X_test)
print("Supervised Model Classification Report:")
print(classification_report(np.argmax(y_test, axis=1), np.argmax(y_pred_supervised, axis=1)))

# Semi-supervised model evaluation
print("Semi-Supervised Model Classification Report:")
print(confusion_matrix(np.argmax(y_test, axis=1), pseudo_labels))


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 281ms/step
Supervised Model Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.77      0.81       210
           1       0.85      0.87      0.86       365
           2       0.92      0.75      0.83       296
           3       0.78      0.95      0.86       342

    accuracy                           0.85      1213
   macro avg       0.86      0.83      0.84      1213
weighted avg       0.85      0.85      0.84      1213

Semi-Supervised Model Classification Report:
[[104  28  29  49]
 [  4 297  31  33]
 [  6  55 207  28]
 [  5  33  17 287]]
