***we are mounting the google drive to access the dataset***


In [None]:
from google.colab import drive
drive.mount('/content/drive')

***we are iterating in each folder of the data folder and making a csv file which contains the path of all the images with their labels***

In [None]:
import os
import pandas as pd

# Base directory in Google Drive (adjust the path as needed)
base_dir = '/content/drive/My Drive/data'

# List to store file paths and labels
file_data = []

# Function to walk through folders and subfolders
def walk_through_folders(base_dir):
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            # Get full file path
            file_path = os.path.join(root, file)
            # Label based on the last folder name
            label = os.path.basename(os.path.dirname(file_path))
            # Append to list
            file_data.append([file_path, label])

# Call the function to start walking through folders
walk_through_folders(base_dir)

# Create a DataFrame
df = pd.DataFrame(file_data, columns=['filename', 'label'])

# Save the DataFrame to CSV
df.to_csv('/content/drive/My Drive/image_path.csv', index=False)

print('CSV file created successfully!')


***we are resizing the images to the 400x400 pixels with the help of openCV and saving all the resized images to a another folder so that we dont have to repeat the same process and making the csv file of the resized images with the filepath and labels***

In [None]:
import os
import pandas as pd
import cv2  # Import OpenCV

# Base directory in Google Drive (adjust the path as needed)
base_dir = '/content/drive/My Drive/data'

# Output directory for resized images
output_dir = '/content/drive/My Drive/resized_images'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# List to store file paths and labels
file_data = []

# Function to walk through folders and subfolders and resize images
def walk_through_folders_and_resize(base_dir):
    image_counter = {}  # Dictionary to keep count of images per label
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            # Get full file path
            file_path = os.path.join(root, file)
            # Label based on the last folder name
            label = os.path.basename(os.path.dirname(file_path))

            # Read the image using OpenCV
            img = cv2.imread(file_path)

            # Check if the image was loaded successfully
            if img is not None:
                # Resize the image to a fixed size (e.g., 400x400 pixels)
                resized_img = cv2.resize(img, (400, 400))

                # Initialize or increment the label counter
                if label not in image_counter:
                    image_counter[label] = 0
                image_counter[label] += 1

                # Define the new filename using the label and counter (e.g., label1, label2, etc.)
                new_filename = f"{label}{image_counter[label]}.jpg"
                output_file_path = os.path.join(output_dir, new_filename)

                # Save the resized image to the output directory
                cv2.imwrite(output_file_path, resized_img)

                # Append the new file path and label to the list
                file_data.append([output_file_path, label])
            else:
                print(f"Failed to load image: {file_path}")

# Call the function to start walking through folders and resize images
walk_through_folders_and_resize(base_dir)

# Create a DataFrame
df = pd.DataFrame(file_data, columns=['filename', 'label'])

# Save the DataFrame to CSV
df.to_csv('/content/drive/My Drive/resized_image_path.csv', index=False)

print('Images resized and CSV file created successfully!')


Which label has the minum number of images

***we are trying to use only 20 images per label so that our model does not get biased to a few labels which has more images***

In [None]:
import os
import pandas as pd
import shutil

# Load the CSV file
csv_file = '/content/drive/My Drive/resized_image_path.csv'
df = pd.read_csv(csv_file)

# New output directory for images (first 20 per label)
new_output_dir = '/content/drive/My Drive/filtered_images'

# Create the new output directory if it doesn't exist
if not os.path.exists(new_output_dir):
    os.makedirs(new_output_dir)

# Function to copy only the first 20 images per label
def copy_first_20_images_per_label(df):
    # Group the DataFrame by 'label'
    grouped = df.groupby('label')

    # Iterate through each label group
    for label, group in grouped:
        # Get the first 20 images from each label group
        limited_group = group.head(25)

        # Iterate over the filtered group and copy files
        for _, row in limited_group.iterrows():
            src_path = row['filename']
            # Create a subfolder for each label
            label_folder = os.path.join(new_output_dir, label)
            if not os.path.exists(label_folder):
                os.makedirs(label_folder)

            # Define the destination path
            dest_path = os.path.join(label_folder, os.path.basename(src_path))

            # Copy the image to the new folder
            shutil.copy(src_path, dest_path)

# Call the function to copy the filtered images
copy_first_20_images_per_label(df)

print('Images copied successfully (max 20 per label)!')


***creating a csv file of the filtered images with their filepath and labels***

In [None]:
import os
import pandas as pd

# Define the output directory where filtered images are stored
new_output_dir = '/content/drive/My Drive/filtered_images'

# List to store file paths and labels for the new CSV
filtered_file_data = []

# Walk through the filtered image folder to gather file paths and labels
for root, dirs, files in os.walk(new_output_dir):
    for file in files:
        # Get full file path
        file_path = os.path.join(root, file)
        # Label based on the folder name
        label = os.path.basename(root)

        # Append file path and label to the list
        filtered_file_data.append([file_path, label])

# Create a DataFrame with the file paths and labels
filtered_df = pd.DataFrame(filtered_file_data, columns=['filename', 'label'])

# Save the DataFrame to a new CSV file
filtered_csv_file = '/content/drive/My Drive/filtered_images_path.csv'
filtered_df.to_csv(filtered_csv_file, index=False)

print('Filtered CSV file created successfully!')


***splitting the filtered images in training(80%),testing(10%),validation(10%) to train our model and encoding labels with some numbers***

In [None]:
import pandas as pd
import numpy as np
import cv2
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.preprocessing import LabelEncoder

# Load the CSV file
df = pd.read_csv('/content/drive/My Drive/filtered_images_path.csv')

# Function to load and preprocess images
def load_images(image_paths):
    images = []
    for img_path in image_paths:
        # Read the image from file
        img = cv2.imread(img_path)
        if img is not None:
            img = img / 255.0  # Normalize the image
            images.append(img)
        else:
            print(f"Error loading image: {img_path}")
    return np.array(images)

# Encode the labels using LabelEncoder
label_encoder = LabelEncoder()
df['encoded_label'] = label_encoder.fit_transform(df['label'])

# Load the images
images = load_images(df['filename'])

# Convert labels to one-hot encoding
labels = to_categorical(df['encoded_label'], num_classes=len(df['label'].unique()))

# Print label to number mapping
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label to Number Mapping:")
print(label_mapping)

# Split the data into training, validation, and test sets (80% train, 10% validation, 10% test)
train_images, temp_images, train_labels, temp_labels = train_test_split(images, labels, test_size=0.2, stratify=labels, random_state=42)
val_images, test_images, val_labels, test_labels = train_test_split(temp_images, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42)


***defining the model layers and early stopping to stop the overfitting of the model***

In [None]:
import numpy as np  # Import numpy
import tensorflow as tf
from tensorflow.keras import datasets, layers,models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Assuming `df` is a DataFrame containing labels
num_classes = len(np.unique(df['label']))  # Use a variable for the number of classes

# Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

# Build the CNN model
model = Sequential([
   Conv2D(64, (3, 3), activation='relu', input_shape=(400, 400, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dense(256, activation='relu'),
    Dropout(0.01),
    Dense(num_classes, activation='softmax')  # Use the variable for number of classes''
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_images, train_labels,
    epochs=50,  # Train for 50 epochs with early stopping in place
    batch_size=32,
    validation_data=(val_images, val_labels),
    callbacks=[early_stopping]  # Use EarlyStopping to handle stopping
)

# Evaluate on test data
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_accuracy}')
print(f'Test Loss: {test_loss}')


***predicting the class of the images from the trained model***

In [None]:
# Load and preprocess the image
preprocessed_image = load_and_preprocess_image(image_path)

# Make a prediction
predictions = model.predict(preprocessed_image)
predicted_class_index = np.argmax(predictions, axis=1)[0]
predicted_class_label = reverse_mapping[predicted_class_index]  # Get the string label

# Display the result
print(f"Image: {image_path}, True Label: {true_label}, Predicted Class: {predicted_class_label}")

# Optional: Visualize the image with the prediction
img_display = cv2.imread(image_path)
img_display = cv2.cvtColor(img_display, cv2.COLOR_BGR2RGB)  # Convert to RGB for displaying
plt.imshow(img_display)
plt.title(f"True Label: {true_label}, Predicted Class: {predicted_class_label}")
plt.axis('off')
plt.show()  # Uncomment this to display images

***Converting the tensorflow model into the tflite for the android app***

In [None]:
import tensorflow as tf
from google.colab import files
from tensorflow.keras.models import load_model


# Load the Keras model
model = load_model('my_model.h5')

# Create a TFLite converter object from the Keras model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Convert the model to TFLite format
tflite_model = converter.convert()

# Save the TFLite model to a file
tflite_model_path = '/content/drive/My Drive/my_cnn_model.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

print(f"Model has been converted to TFLite and saved as '{tflite_model_path}'.")
# model.save('/content/drive/My Drive/tflite_model.tflite')

# Download the TFLite model
#files.download('my_cnn_model.h5')