#### Organizing the data

For TensorFlow to correctly assess the data, it needs to be organized like this

In [None]:
# Script for consolidating the original data set (optional)

import os
import shutil

# Base directory containing Training and Test folders
base_dir = r"fruits-360_dataset_100x100\fruits-360"
output_dir = r"consolidated_fruits"

class_mapping = {
    "Apple 6": "Apple",
    "Apple Braeburn 1": "Apple",
    "Apple Crimson Snow 1": "Apple",
    "Apple Golden 1": "Apple",
    "Apple Golden 2": "Apple",
    "Apple Golden 3": "Apple",
    "Apple Granny Smith 1": "Apple",
    "Apple hit 1": "Apple",
    "Apple Pink Lady 1": "Apple",
    "Apple Red 1": "Apple",
    "Apple Red 2": "Apple",
    "Apple Red 3": "Apple",
    "Apple Red Delicious 1": "Apple",
    "Apple Red Yellow 1": "Apple",
    "Apple Red Yellow 2": "Apple",
    "Apricot 1": "Apricot",
    "Avocado 1": "Avocado",
    "Avocado ripe 1": "Avocado",
    "Banana 1": "Banana",
    "Banana Lady Finger 1": "Banana",
    "Banana Red 1": "Banana",
    "Beetroot 1": "Beetroot",
    "Blueberry 1": "Blueberry",
    "Cabbage white 1": "Cabbage",
    "Cactus fruit 1": "Cactus fruit",
    "Cantaloupe 1": "Melon",
    "Cantaloupe 2": "Melon",
    "Carambula 1": "Carambula",
    "Carrot 1": "Carrot",
    "Cauliflower 1": "Cauliflower",
    "Cherry 1": "Cherry",
    "Cherry 2": "Cherry",
    "Cherry Rainier 1": "Cherry",
    "Cherry Wax Black 1": "Cherry",
    "Cherry Wax Red 1": "Cherry",
    "Cherry Wax Yellow 1": "Cherry",
    "Chestnut 1": "Chestnut",
    "Clementine 1": "Clementine",
    "Cocos 1": "Cocos",
    "Corn 1": "Corn",
    "Corn Husk 1": "Corn Husk",
    "Cucumber 1": "Cucumber",
    "Cucumber 3": "Cucumber",
    "Cucumber Ripe 1": "Cucumber",
    "Cucumber Ripe 2": "Cucumber",
    "Dates 1": "Dates",
    "Eggplant 1": "Eggplant",
    "Eggplant long 1": "Eggplant",
    "Fig 1": "Fig",
    "Ginger Root 1": "Ginger Root",
    "Granadilla 1": "Granadilla",
    "Grape Blue 1": "Grape",
    "Grape Pink 1": "Grape",
    "Grape White 1": "Grape",
    "Grape White 2": "Grape",
    "Grape White 3": "Grape",
    "Grape White 4": "Grape",
    "Grapefruit Pink 1": "Grapefruit",
    "Grapefruit White 1": "Grapefruit",
    "Guava 1": "Guava",
    "Hazelnut 1": "Hazelnut",
    "Huckleberry 1": "Huckleberry",
    "Kaki 1": "Kaki",
    "Kiwi 1": "Kiwi",
    "Kohlrabi 1": "Kohlrabi",
    "Kumquats 1": "Kumquats",
    "Lemon 1": "Lemon",
    "Lemon Meyer 1": "Lemon",
    "Limes 1": "Limes",
    "Lychee 1": "Lychee",
    "Mandarine 1": "Mandarine",
    "Mango 1": "Mango",
    "Mango Red 1": "Mango",
    "Mangostan 1": "Mangostan",
    "Maracuja 1": "Maracuja",
    "Melon Piel de Sapo 1": "Melon",
    "Mulberry 1": "Mulberry",
    "Nectarine 1": "Nectarine",
    "Nectarine Flat 1": "Nectarine",
    "Nut Forest 1": "Nut Fores",
    "Nut Pecan 1": "Nut Pecan",
    "Onion Red 1": "Onion",
    "Onion Red Peeled 1": "Onion",
    "Onion White 1": "Onion",
    "Orange 1": "Orange",
    "Papaya 1": "Papaya",
    "Passion Fruit 1": "Passion Fruit",
    "Peach 1": "Peach",
    "Peach 2": "Peach",
    "Peach Flat 1": "Peach",
    "Pear 1": "Pear",
    "Pear 2": "Pear",
    "Pear 3": "Pear 3",
    "Pear Abate 1": "Pear",
    "Pear Forelle 1": "Pear",
    "Pear Kaiser 1": "Pear",
    "Pear Monster 1": "Pear",
    "Pear Red 1": "Pear",
    "Pear Stone 1": "Pear",
    "Pear Williams 1": "Pear",
    "Pepino 1": "Pepino",
    "Pepper Green 1": "Pepper",
    "Pepper Orange 1": "Pepper",
    "Pepper Red 1": "Pepper",
    "Pepper Yellow 1": "Pepper",
    "Physalis 1": "Physalis",
    "Physalis with Husk 1": "Physalis",
    "Pineapple 1": "Pineapple",
    "Pineapple Mini 1": "Pineapple",
    "Pitahaya Red 1": "Pitahaya",
    "Plum 1": "Plum",
    "Plum 2": "Plum",
    "Plum 3": "Plum",
    "Pomegranate 1": "Pomegranate",
    "Pomelo Sweetie 1": "Pomelo Sweetie",
    "Potato Red 1": "Potato",
    "Potato Red Washed 1": "Potato",
    "Potato Sweet 1": "Potato",
    "Potato White 1": "Potato",
    "Quince 1": "Quince",
    "Rambutan 1": "Rambutan",
    "Raspberry 1": "Raspberry",
    "Redcurrant 1": "Redcurrant",
    "Salak 1": "Salak",
    "Strawberry 1": "Strawberry",
    "Strawberry Wedge 1": "Strawberry",
    "Tamarillo 1": "Tamarillo",
    "Tangelo 1": "Tangelo",
    "Tomato 1": "Tomato",
    "Tomato 2": "Tomato",
    "Tomato 3": "Tomato",
    "Tomato 4": "Tomato",
    "Tomato Cherry Red 1": "Tomato",
    "Tomato Heart 1": "Tomato",
    "Tomato Maroon 1": "Tomato",
    "Tomato not Ripened 1": "Tomato",
    "Tomato Yellow 1": "Tomato",
    "Walnut 1": "Walnut",
    "Watermelon 1": "Melon",
    "Zucchini 1": "Zucchini",
    "Zucchini dark 1": "Zucchini"
}



# Process each subfolder (Training and Test)
for subfolder in ["Training", "Test"]:
    subfolder_path = os.path.join(base_dir, subfolder)
    if not os.path.exists(subfolder_path):
        print(f"Subfolder '{subfolder}' not found in '{base_dir}'. Skipping...")
        continue

    # Target folder for consolidated data
    output_subfolder = os.path.join(output_dir, subfolder)
    os.makedirs(output_subfolder, exist_ok=True)

    # Iterate through class folders in the subfolder
    for class_folder in os.listdir(subfolder_path):
        class_folder_path = os.path.join(subfolder_path, class_folder)

        # Check if it's a directory
        if not os.path.isdir(class_folder_path):
            continue

        # Determine the consolidated class name
        target_class = class_mapping.get(class_folder, class_folder)  # Keep original name if not mapped
        target_folder = os.path.join(output_subfolder, target_class)

        # Ensure the target class folder exists
        os.makedirs(target_folder, exist_ok=True)
        
        print(f"{class_folder} -> {target_class}")

        # Move images to the consolidated folder
        for image_name in os.listdir(class_folder_path):
            src_image_path = os.path.join(class_folder_path, image_name)

            # Add the original class name to the image filename to avoid overlap
            new_image_name = f"{class_folder}_{image_name}"
            dst_image_path = os.path.join(target_folder, new_image_name)

            # Copy the image to the target folder with the new name
            shutil.copy(src_image_path, dst_image_path)

# Print confirmation
print(f"Dataset has been reorganized. Consolidated dataset is saved in: {output_dir}")

## Training the initial model

In [None]:
train_dir = r"fruits-360_dataset_100x100\fruits-360\Training"
test_dir = r"fruits-360_dataset_100x100\fruits-360\Test"

In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Generator
train_gen = ImageDataGenerator(rescale=1./255)
train_data = train_gen.flow_from_directory(
    train_dir, 
    target_size=(100, 100),                        
    batch_size=32, 
    class_mode='categorical'
)

We will be using a pretrained model, MobileNetV2, as a base for our model. Since our dataset is relatively small and generalizable, this will be easier and more optimal.

In [None]:
# Load Pretrained Model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(100, 100, 3))

# Freeze base model
base_model.trainable = False

# Add custom classifier
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dense(train_data.num_classes, activation='softmax')
])

# Compile the Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the Model
model.fit(train_data, epochs=7)

In [None]:
# Save the model in HDF5 format
model.save('my_model_notuning.keras')

In [None]:
from tensorflow.keras.models import load_model

# Load the model from the HDF5 file
loaded_model = load_model('my_model.keras')
loaded_model

## Evaluating the model

In [None]:
# Evaluate on test data
test_gen = ImageDataGenerator(rescale=1./255)
test_data = test_gen.flow_from_directory(
    test_dir,
    target_size=(100, 100),
    batch_size=32,
    class_mode='categorical'
)
test_loss, test_accuracy = model.evaluate(test_data)
print(f"Test Accuracy: {test_accuracy}")

In [None]:
import matplotlib.pyplot as plt

# Plot training accuracy
plt.figure(figsize=(10, 5))
plt.plot(model.history.history['accuracy'], label='Training Accuracy')  # Correct reference
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot training loss
plt.figure(figsize=(10, 5))
plt.plot(model.history.history['loss'], label='Training Loss')  # Correct reference
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Save the plots to a PDF
plt.savefig('training_charts.pdf')
plt.show()


In [None]:
model.history.history

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import pandas as pd

# Generate predictions for the test data
test_data.reset()
y_pred = model.predict(test_data)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = test_data.classes
class_labels = list(test_data.class_indices.keys())

# Generate classification report
report = classification_report(y_true, y_pred_classes, target_names=class_labels, output_dict=True)

# Convert the report to a DataFrame for easier manipulation
report_df = pd.DataFrame(report).transpose()

# Extract F1 scores
f1_scores = report_df['f1-score'].iloc[:-3]  # Exclude 'accuracy', 'macro avg', and 'weighted avg'

# Plot F1 scores
plt.figure(figsize=(16, 8))
f1_scores.plot(kind='bar', color='skyblue')
plt.title('F1 Scores for Each Class')
plt.xlabel('Classes')
plt.ylabel('F1 Score')
plt.xticks(rotation=90)
plt.tight_layout()

# Save the plot as a PDF
plt.savefig('f1_scores_bar_chart.pdf')

# Show the plot
plt.show()


In [None]:
f1_scores.to_csv('f1_scores.csv', header=['F1 Score'])

with open('f1_scores.txt', 'w') as file:
    for class_name, score in f1_scores.items():
        file.write(f"{class_name}: {score:.4f}\n")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Calculate support directly from y_true
class_labels = list(test_data.class_indices.keys())
unique, counts = np.unique(y_true, return_counts=True)
support_dict = dict(zip(unique, counts))
support = [support_dict[i] for i in range(len(class_labels))]

# Plot support as a bar chart
plt.figure(figsize=(12, 8))
plt.bar(class_labels, support, color='lightcoral', alpha=0.7)
plt.title('Class Support (Number of Samples)', fontsize=16)
plt.xlabel('Class', fontsize=14)
plt.ylabel('Support (Number of Samples)', fontsize=14)
plt.xticks(rotation=90, fontsize=10)
plt.tight_layout()

# Save the plot as a PDF
plt.savefig('class_support.pdf')
plt.show()

In [None]:
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt

# Load a sample image
img_path = r"Project-D6-FruitIP-main\Project-D6-FruitIP-main\realfruits100x100\real_apple1.jpg"
img = image.load_img(img_path, target_size=(100, 100))
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)

# Make a prediction
predictions = model.predict(img_array)
predicted_class = np.argmax(predictions, axis=1)[0]
predicted_label = test_data.class_indices.keys()

# Display the image with the predicted label
plt.imshow(img)
plt.title(f"Predicted: {list(predicted_label)[predicted_class]}")
plt.axis('off')
plt.savefig('example_prediction.pdf')
plt.show()

## Fine tuning the model

In [None]:
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight

# Load Pretrained Model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(100, 100, 3))

# Freeze base model
base_model.trainable = False

# Unfreeze last 50 layers in base_model for fine-tuning
for layer in base_model.layers[-50:]:
    layer.trainable = True

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dense(train_data.num_classes, activation='softmax')
])

model.compile(
    optimizer=Adam(learning_rate=1e-5),  # Lower learning rate
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Pass class weights during training
model.fit(train_data, epochs=7)