In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
import os
from sklearn.model_selection import train_test_split


In [None]:
# https://www.kaggle.com/datasets/phucthaiv02/butterfly-image-classification
csv_file = 'training_set.csv'
data = pd.read_csv(csv_file)
data.head()

In [None]:
image_dir = 'train'
data['filepath'] = data['filename'].apply(lambda x: os.path.join(image_dir, x))
data.head()

In [None]:
data['label'].unique()

In [None]:
class_names = data['label'].unique()  # Update here
class_to_index = {class_name: i for i, class_name in enumerate(class_names)}
data['label'] = data['label'].map(class_to_index)  # Map based on 'label' column

data.head()

In [None]:
data['label'].unique()

In [18]:
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42, stratify=data['label'])


In [None]:
train_data.head()

In [None]:
val_data.head()

In [23]:
def process_image(file_path, label):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)  # Decodes the image as RGB.
    img = tf.image.resize(img, [64, 64])  # Resizes the image to 64x64 pixels.
    img = img / 255.0  # Normalizes the pixel values to the range [0, 1].
    return img, label


In [24]:
def create_dataset(dataframe):
    file_paths = dataframe['filepath'].values
    labels = dataframe['label'].values
    dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    dataset = dataset.map(process_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=len(dataframe)).batch(32).prefetch(tf.data.AUTOTUNE)
    return dataset

train_dataset = create_dataset(train_data)
val_dataset = create_dataset(val_data)


In [None]:
type(train_dataset)

In [None]:
model = models.Sequential([
    layers.Conv2D(16, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(class_names), activation='softmax')  # Output layer for multiclass classification
])


In [27]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
model.summary()


In [None]:
history = model.fit(
    train_dataset,
    epochs=10,
    validation_data=val_dataset
)


In [None]:
test_loss, test_accuracy = model.evaluate(val_dataset)
print(f"Validation Accuracy: {test_accuracy * 100:.2f}%")


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def predict_label(image_path, model=model, class_names=class_names):
    """
    Predict the label of an input image using the trained model.

    Parameters:
        image_path (str): Path to the input image.
        model (tf.keras.Model): Trained TensorFlow/Keras model.
        class_names (list): List of class names in order of their encoded labels.

    Returns:
        str: Predicted label name for the input image.
    """
    # Load and preprocess the image
    img = load_img(image_path, target_size=(64, 64))  # Load the image and resize it to 64x64
    img_array = img_to_array(img)  # Convert the image to a numpy array
    img_array = img_array / 255.0  # Normalize pixel values to [0, 1]
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension (shape: [1, 64, 64, 3])

    # Make predictions
    predictions = model.predict(img_array)  # Returns an array of probabilities
    predicted_index = np.argmax(predictions)  # Get the index of the highest probability
    predicted_label = class_names[predicted_index]  # Map index to the class name

    return predicted_label

predict_label('train\Image_4.jpg')