In [1]:
# Step 0 — Imports
import os
import shutil
import glob
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Fix random seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)


2025-11-16 04:16:32.919667: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763266593.216724      48 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763266593.317965      48 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [2]:
# Step 1 — Paths
DATASET_PATHS = [
    '/kaggle/input/fruit-recognition',
    '/kaggle/input/fruits-fresh-and-rotten-for-classification',
    
]

MERGED_ROOT = '/kaggle/working/merged_fruits_dataset'
IMG_SIZE = (128, 128)
os.makedirs(MERGED_ROOT, exist_ok=True)


In [3]:
# Step 2 — Merge datasets (recursive)
def copy_images(src_folder, dest_root):
    for root, dirs, files in os.walk(src_folder):
        # class name = top-level folder under src_folder
        # We get relative path to src_folder
        rel_path = os.path.relpath(root, src_folder)
        if rel_path == '.':
            continue  # skip root itself
        class_name = rel_path.replace(" ", "_")  # remove spaces
        dest_dir = os.path.join(dest_root, class_name)
        os.makedirs(dest_dir, exist_ok=True)
        for f in files:
            if f.lower().endswith(('.jpg', '.jpeg', '.png')):
                src_path = os.path.join(root, f)
                dest_path = os.path.join(dest_dir, f)
                if os.path.exists(dest_path):
                    base, ext = os.path.splitext(f)
                    dest_path = os.path.join(dest_dir, base + f"_{random.randint(0,9999)}" + ext)
                shutil.copy(src_path, dest_path)

# Merge all datasets
for dataset in DATASET_PATHS:
    copy_images(dataset, MERGED_ROOT)

# Check classes and number of images
print("Classes and counts after merging:")
for cls in os.listdir(MERGED_ROOT):
    print(cls, len(os.listdir(os.path.join(MERGED_ROOT, cls))))


Classes and counts after merging:
Carambola 2080
muskmelon 2078
Banana 3027
Pear 3012
Mango 4154
Apple 7
Persimmon 2072
Tomatoes 2171
Kiwi 4
Pomegranate 2167
Plum 2298
Peach 2629
Pitaya 2501
Orange 3012
dataset 3
Guava 4


In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

IMG_SIZE = (128, 128)
BATCH_SIZE = 32


# STEP 3 — Create dataframe of image paths and labels

image_paths = []
labels = []

for root, dirs, files in os.walk(MERGED_ROOT):
    for file in files:
        if file.lower().endswith(("jpg","png","jpeg")):
            full_path = os.path.join(root, file)
            label = os.path.basename(os.path.dirname(full_path))
            labels.append(label)
            image_paths.append(full_path)

df = pd.DataFrame({
    "path": image_paths,
    "label": labels
})

print("Total images:", len(df))
print("Sample labels:", df['label'].unique())


# STEP 4 — Clean wrong classes like 'dataset'

classes = [c for c in df['label'].unique() if c.lower() not in ['dataset', 'unknown', 'misc']]
df = df[df['label'].isin(classes)]
num_classes = len(classes)

print("VALID CLASSES:", classes)
print("Number of classes:", num_classes)


# STEP 5 — Train/Val/Test split

train_df, test_df = train_test_split(df, test_size=0.15, stratify=df['label'], random_state=42)
train_df, val_df  = train_test_split(train_df, test_size=0.15, stratify=train_df['label'], random_state=42)

print("Train:", len(train_df), " | Val:", len(val_df), " | Test:", len(test_df))


# STEP 6 — Image Generators (DEFINE train_datagen here!)

train_datagen = ImageDataGenerator(
    rescale=1/255.,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1/255.)

train_gen = train_datagen.flow_from_dataframe(
    train_df,
    x_col='path',
    y_col='label',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    classes=classes
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    x_col='path',
    y_col='label',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False,
    classes=classes
)

print("GENERATOR CLASS MAP:", train_gen.class_indices)

# STEP 7 — Build CNN (Correct Output = num_classes)

cnn = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    layers.MaxPooling2D(2,2),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')   # IMPORTANT!!!
])

cnn.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

cnn.summary()




In [None]:
# STEP 8 — Train CNN

history = cnn.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10)

Total images: 97747
Sample labels: ['Carambola' 'muskmelon' 'Banana' 'Pear' 'Mango' 'Total_Number_of_Apples'
 'Apple_D' 'Apple_A' 'Apple_F' 'Apple_E' 'Apple_C' 'Apple_B' 'Persimmon'
 'Tomatoes' 'Kiwi_B' 'kiwi_A' 'Total_Number_of_Kiwi_fruit' 'Kiwi_C'
 'Pomegranate' 'Plum' 'Peach' 'Pitaya' 'Orange' 'rottenoranges'
 'freshbanana' 'freshapples' 'rottenapples' 'rottenbanana' 'freshoranges'
 'guava_B' 'Guava_total' 'guava_total_final' 'guava_A']
VALID CLASSES: ['Carambola', 'muskmelon', 'Banana', 'Pear', 'Mango', 'Total_Number_of_Apples', 'Apple_D', 'Apple_A', 'Apple_F', 'Apple_E', 'Apple_C', 'Apple_B', 'Persimmon', 'Tomatoes', 'Kiwi_B', 'kiwi_A', 'Total_Number_of_Kiwi_fruit', 'Kiwi_C', 'Pomegranate', 'Plum', 'Peach', 'Pitaya', 'Orange', 'rottenoranges', 'freshbanana', 'freshapples', 'rottenapples', 'rottenbanana', 'freshoranges', 'guava_B', 'Guava_total', 'guava_total_final', 'guava_A']
Number of classes: 33
Train: 70621  | Val: 12463  | Test: 14663
Found 70621 validated image filenames bel

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-11-16 04:28:16.061223: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


  self._warn_if_super_not_called()


Epoch 1/10
[1m2207/2207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1489s[0m 673ms/step - accuracy: 0.5080 - loss: 1.6121 - val_accuracy: 0.7529 - val_loss: 0.6496
Epoch 2/10
[1m2207/2207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1446s[0m 655ms/step - accuracy: 0.7439 - loss: 0.6667 - val_accuracy: 0.8005 - val_loss: 0.4966
Epoch 3/10
[1m2207/2207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1451s[0m 657ms/step - accuracy: 0.7781 - loss: 0.5593 - val_accuracy: 0.7888 - val_loss: 0.5133
Epoch 4/10
[1m2207/2207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1525s[0m 691ms/step - accuracy: 0.7921 - loss: 0.5108 - val_accuracy: 0.8284 - val_loss: 0.4034
Epoch 5/10
[1m2207/2207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1462s[0m 662ms/step - accuracy: 0.8031 - loss: 0.4709 - val_accuracy: 0.7867 - val_loss: 0.5296
Epoch 6/10
[1m2207/2207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1472s[0m 667ms/step - accuracy: 0.8155 - loss: 0.4402 - val_accuracy: 0.8297 - val

In [5]:
 print(history.history.keys())

print("Final Training Accuracy:", history.history['accuracy'][-1])
print("Final Validation Accuracy:", history.history['val_accuracy'][-1])


dict_keys(['accuracy', 'loss', 'val_accuracy', 'val_loss'])
Final Training Accuracy: 0.8305603265762329
Final Validation Accuracy: 0.7639412879943848


In [None]:
test_loss, test_acc = cnn.evaluate(test_gen)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)



Test Accuracy : 78.932

In [6]:
!pip install scikit-image




In [7]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog, local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns


In [8]:

#STEP 3 — HOG + LBP Feature Extraction

IMG_SIZE = (128, 128)

def extract_features(img_path):
    try:
        img = cv2.imread(img_path)
        img = cv2.resize(img, IMG_SIZE)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # HOG
        hog_features = hog(gray, 
                           orientations=9, 
                           pixels_per_cell=(8, 8),
                           cells_per_block=(2, 2), 
                           block_norm='L2-Hys')

        # LBP
        lbp = local_binary_pattern(gray, P=8, R=1, method="uniform")
        (hist, _) = np.histogram(lbp.ravel(),
                                bins=np.arange(0, 8 + 3),
                                range=(0, 8 + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)

        # Final combined feature
        return np.concatenate([hog_features, hist])

    except:
        return None


In [9]:
#Extract Features for Whole Dataset

feature_list = []
label_list = []

for i, row in df.iterrows():
    f = extract_features(row['path'])
    if f is not None:
        feature_list.append(f)
        label_list.append(row['label'])

X = np.array(feature_list)
y = np.array(label_list)

print("Feature shape:", X.shape)
print("Labels:", len(y))


Feature shape: (97747, 8110)
Labels: 97747


In [10]:
#Split Data + Scale
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
