In [1]:
import os

# Define the path where you want to store the combined dataset
base_dir = "/mnt/data/papaya_dataset"

# Create the directory (if it doesn't exist)
os.makedirs(base_dir, exist_ok=True)

print(f"Directory '{base_dir}' created successfully!")


Directory '/mnt/data/papaya_dataset' created successfully!


In [3]:
from google.colab import files
uploaded = files.upload()

ModuleNotFoundError: No module named 'google.colab'

In [2]:
import os
import zipfile
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Define dataset paths
zip_files = {
    "/content/Papaya_Ripe.zip": "Ripe",
    "/content/Papaya_Classification.zip": "Classification",
    "/content/Papaya_Leaf.zip": "Leaf"
}

# Create a directory to store the combined dataset
base_dir = "/mnt/data/papaya_dataset"
os.makedirs(base_dir, exist_ok=True)

# Extract images and assign labels
for zip_path, label in zip_files.items():
    extract_path = os.path.join(base_dir, label)
    os.makedirs(extract_path, exist_ok=True)

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

print("Extraction complete. Images are labeled and organized.")

# Splitting data into train, validation, and test sets
data_dir = base_dir
target_size = (150, 150)
batch_size = 32

# Data Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80-20 train-validation split
)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Build a CNN Model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(len(zip_files), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
epochs = 10
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs
)

print("Model training complete!")


FileNotFoundError: [Errno 2] No such file or directory: '/content/Papaya_Ripe.zip'

In [None]:
import os
print(os.listdir(base_dir))  # Check class folders
for category in os.listdir(base_dir):
    print(f"{category}: {len(os.listdir(os.path.join(base_dir, category)))} images")


In [None]:
image_files = [f for f in os.listdir(category_path) if f.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif'))]


In [None]:
import os

base_dir = "/mnt/data/papaya_dataset"

for category in os.listdir(base_dir):
    category_path = os.path.join(base_dir, category)
    print(f"\n🔹 Checking category: {category}")

    if os.path.isdir(category_path):
        sub_items = os.listdir(category_path)
        print(f"  Contains {len(sub_items)} items")
        print("  Sample items:", sub_items[:5])  # Show first 5 items


In [None]:
import os
import matplotlib.pyplot as plt
import random
from PIL import Image

# Define dataset path
base_dir = "/mnt/data/papaya_dataset"
categories = ["Classification", "Ripe", "Leaf"]

# Create a dictionary to store image paths per category
category_images = {}

for category in categories:
    category_path = os.path.join(base_dir, category)
    image_files = []

    # Walk through directories to find images
    for root, _, files in os.walk(category_path):
        for file in files:
            if file.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif')):
                image_files.append(os.path.join(root, file))

    # Store images (if available)
    if image_files:
        category_images[category] = image_files

# Check if we found images
if not category_images:
    print("No images found in any category.")
else:
    # Define the number of images per category to display
    images_per_category = 5

    # Create a subplot for each category
    fig, axes = plt.subplots(len(category_images), images_per_category, figsize=(15, 5 * len(category_images)))

    # Ensure axes are iterable for a single category case
    if len(category_images) == 1:
        axes = [axes]

    for ax_row, (category, images) in zip(axes, category_images.items()):
        selected_images = random.sample(images, min(images_per_category, len(images)))

        for ax, img_path in zip(ax_row, selected_images):
            img = Image.open(img_path)
            ax.imshow(img)
            ax.set_title(category)
            ax.axis("off")

    plt.tight_layout()
    plt.show()


In [None]:
import tensorflow as tf
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define dataset path
base_dir = "/mnt/data/papaya_dataset"

# Define batch size and image size
batch_size = 32
img_height = 224
img_width = 224

# Create ImageDataGenerator for training (with augmentation)
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Use 20% of the data for validation
)

# Load training data
train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="categorical",  # Use 'binary' if only 2 classes
    subset="training"
)

# Load validation data
val_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode="categorical",
    subset="validation"
)

# Get class indices (to verify labels)
print("Class labels:", train_generator.class_indices)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(img_height, img_width, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation="relu"),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),  # Prevent overfitting
    Dense(len(train_generator.class_indices), activation="softmax")  # Output layer
])

# Compile the model
model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Model summary
model.summary()


In [None]:
# Train the model
epochs = 10  # Increase for better results
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs
)


In [None]:
# Evaluate model on validation data
loss, accuracy = model.evaluate(val_generator)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Save model for future use
model.save("/mnt/data/papaya_model.h5")


In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D

# Load pre-trained MobileNetV2
base_model = MobileNetV2(input_shape=(img_height, img_width, 3), include_top=False, weights="imagenet")

# Freeze base layers
base_model.trainable = False

# Add custom layers
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(len(train_generator.class_indices), activation="softmax")
])

# Compile and train (same steps as before)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(train_generator, validation_data=val_generator, epochs=10)


In [None]:
# Evaluate model on validation data
loss, accuracy = model.evaluate(val_generator)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Save model for future use
model.save("/mnt/data/papaya_model.h5")
