In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.layers import Dense, Flatten, Input, Lambda, Dropout
from keras.models import Model, Sequential
from tensorflow.keras.optimizers import AdamW
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
import tensorflow as tf
import cv2
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import random
import warnings
warnings.filterwarnings("ignore")

In [2]:
Seed = 22
random.seed(Seed)
np.random.seed(Seed)
tf.random.set_seed(Seed)

In [3]:
IMAGE_SIZE = [224, 224]
CHANNEL = 3
BATCH_SIZE = 32
EPOCHS = 20
CLASS_MODE = 'categorical'
DENSE_LAYER_ACTIVATION = 'softmax'
OBJECTIVE_FUNCTION = 'categorical_crossentropy'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import zipfile
zip_path = '/content/drive/MyDrive/Colab Notebooks/Research/Image proccessing/fruit_vegetable dataset.zip'  # Update your zip path
extract_path = '/content/fruit_dataset/'

# -----------------------------
# Extract zip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [None]:
!ls "/content/fruit_dataset/"

In [None]:
import os

extract_path = '/content/fruit_dataset/'
print(os.listdir(extract_path))


In [None]:
train_path = '/content/fruit_dataset/images/fruit_vegetable dataset/train'
test_path  = '/content/fruit_dataset/images/fruit_vegetable dataset/test'
val_path   = '/content/fruit_dataset/images/fruit_vegetable dataset/validation'


In [None]:
!mv "/content/fruit_dataset/fruit_vegetable dataset" "/content/fruit_dataset/fruit_vegetable_dataset"

In [None]:
train_path = '/content/fruit_dataset/fruit_vegetable_dataset/train'
test_path  = '/content/fruit_dataset/fruit_vegetable_dataset/test'
val_path   = '/content/fruit_dataset/fruit_vegetable_dataset/validation'

In [None]:
def get_path_img(path):
    img_path = []
    label = []

    for folder in os.listdir(path):
        folder_path = os.path.join(path, folder)
        if os.path.isdir(folder_path):
            for img in os.listdir(folder_path):
                if img.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")):
                    img_path.append(os.path.join(folder_path, img))
                    label.append(folder)

    data = pd.DataFrame({
        "Image_Path": img_path,
        "Class": label
    })
    return data

In [None]:
train_df = get_path_img(train_path)
test_df = get_path_img(test_path)
val_df = get_path_img(val_path)

# Check
print("Train samples:", len(train_df))
print("Test samples:", len(test_df))
print("Validation samples:", len(val_df))
train_df.head()

In [None]:
train_df['Class'].value_counts()


In [None]:
train_df = train_df.sample(frac=1, random_state=22).reset_index(drop=True)
test_df  = test_df.sample(frac=1, random_state=22).reset_index(drop=True)
val_df   = val_df.sample(frac=1, random_state=22).reset_index(drop=True)


In [None]:
NumberOfClasses = len(train_df['Class'].unique())

In [None]:
train_df['Class'].value_counts()

In [None]:
def show_sample_images(data, n_classes_to_show=36, n_cols=6):
    classes = data['Class'].unique()
    if len(classes) > n_classes_to_show:
        classes = random.sample(list(classes), n_classes_to_show)

    n_classes = len(classes)
    n_rows = (n_classes // n_cols) + 1

    plt.figure(figsize=(n_cols*3, n_rows*3))

    for i, cls in enumerate(classes):
        sample = data[data['Class'] == cls].sample(1, random_state=42).iloc[0]
        img = cv2.imread(sample['Image_Path'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.subplot(n_rows, n_cols, i+1)
        plt.imshow(img)
        plt.axis('off')
        plt.text(0.5, -0.15, cls, ha='center', va='center', fontsize=8, transform=plt.gca().transAxes)

    plt.tight_layout()
    plt.show()



In [None]:
# Number of classes সঠিকভাবে বের করা
NumberOfClasses = train_df['Class'].nunique()
print("Number of classes:", NumberOfClasses)

# Sample images দেখানো
show_sample_images(train_df, n_classes_to_show=NumberOfClasses, n_cols=5)


In [None]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input
)

In [None]:
train_set = train_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'Image_Path',
    y_col = 'Class',
    target_size = (224 , 224),
    color_mode = 'rgb',
    class_mode =CLASS_MODE,
    batch_size = BATCH_SIZE,
    shuffle = True,
    seed = Seed,
    rotation_range = 30,
    zoom_range = 0.15,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.15,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

In [None]:
val_set = train_generator.flow_from_dataframe(
    dataframe = val_df,
    x_col = 'Image_Path',
    y_col = 'Class',
    target_size = (224 , 224),
    color_mode = 'rgb',
    class_mode = CLASS_MODE,
    batch_size = BATCH_SIZE,
    shuffle = True,
    seed = Seed,
    rotation_range = 30,
    zoom_range = 0.15,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.15,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

In [None]:
test_set = train_generator.flow_from_dataframe(
    dataframe = test_df,
    x_col = 'Image_Path',
    y_col = 'Class',
    target_size = (224 , 224),
    color_mode = 'rgb',
    class_mode = CLASS_MODE,
    batch_size = BATCH_SIZE,
    shuffle = False
)

In [None]:
base_model = tf.keras.applications.MobileNetV2(
    include_top=False,
    weights='imagenet',
    input_shape=(224, 224, 3)
)

base_model.trainable = False

In [None]:
inputs = base_model.input
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D(name='gap')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.25)(x)
outputs = tf.keras.layers.Dense(NumberOfClasses, activation=DENSE_LAYER_ACTIVATION, name='predictions')(x)

# Create model
from tensorflow.keras.models import Model
model = Model(inputs=base_model.input, outputs=outputs)



In [None]:
def compile_model(model, initial_lr=1e-3, weight_decay=0.0):
    optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr)
    loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.0)
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=5)])
    return model

model = compile_model(model)

In [None]:
history = model.fit(
    train_set,
    validation_data=val_set,
    batch_size = BATCH_SIZE,
    epochs=15
)

In [None]:
output_dir = "/content/results"
os.makedirs(output_dir, exist_ok=True)
print("Results folder created at:", output_dir)


In [None]:
def plot_history(history, output_dir="/content/results"):
    # Make sure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save history as CSV
    df = pd.DataFrame(history.history)
    df.to_csv(os.path.join(output_dir, 'history.csv'), index=False)
    print("History CSV saved at:", os.path.join(output_dir, 'history.csv'))

    # Plot Loss
    plt.figure(figsize=(8, 4))
    plt.plot(df['loss'], label='train_loss')
    plt.plot(df['val_loss'], label='val_loss')
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training vs Validation Loss")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'loss.png'))
    plt.close()
    print("Loss plot saved at:", os.path.join(output_dir, 'loss.png'))

    # Plot Accuracy
    plt.figure(figsize=(8, 4))
    plt.plot(df['accuracy'], label='train_acc')
    plt.plot(df['val_accuracy'], label='val_acc')
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.title("Training vs Validation Accuracy")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'accuracy.png'))
    plt.close()
    print("Accuracy plot saved at:", os.path.join(output_dir, 'accuracy.png'))

In [None]:
# Suppose your model training history is stored in 'history'
plot_history(history, output_dir="/content/results")


In [None]:
class_names = sorted(train_df['Class'].unique())
class_indices = {name: idx for idx, name in enumerate(class_names)}

In [None]:
preds = model.predict(test_set, verbose=1)

# ✅ If the last layer of the model has softmax activation
# Get the predicted class indices
pred_classes = np.argmax(preds, axis=1)

# ✅ True class indices from the test generator
true_classes = test_set.classes

# ✅ Class labels (names) from the generator
class_labels = list(test_set.class_indices.keys())

# ✅ Create a DataFrame to compare predictions and true labels
results_df = pd.DataFrame({
    'Image_Path': test_set.filenames,
    'True_Label': [class_labels[i] for i in true_classes],
    'Predicted_Label': [class_labels[i] for i in pred_classes],
    'Confidence': preds.max(axis=1)  # maximum probability for each prediction
})

# ✅ Save the results as a CSV file
results_df.to_csv('predictions_results.csv', index=False)

In [None]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(true_classes, pred_classes)
print(f"Test Accuracy: {acc * 100:.2f}%")