In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image

# The dataset path
dataset_path = '/kaggle/input/bombus/bombus_project'

# Define image size
img_height, img_width = 224, 224

# Function to load and preprocess TIFF images
def load_images(base_path):
    images = []
    species_labels = []
    gender_labels = []
    
    for folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder)
        if os.path.isdir(folder_path) and not folder.startswith('.'):  # Ignore hidden files/folders
            for file in os.listdir(folder_path):
                if file.endswith('.tif') and not file.startswith('.'):
                    img_path = os.path.join(folder_path, file)
                    img = Image.open(img_path)
                    img = img.resize((img_width, img_height))
                    img = np.array(img)
                    if img.ndim == 2:  # If the image is grayscale, convert to RGB
                        img = np.stack((img,)*3, axis=-1)
                    img = tf.keras.applications.resnet50.preprocess_input(img)  # Adjust preprocessing for your model
                    
                    images.append(img)
                    # Assuming folder names are like 'sic_alt_male'
                    parts = folder.split('_')
                    species_labels.append('_'.join(parts[:-1]))  # Combine all parts except the last as species
                    gender_labels.append(parts[-1])  # Last part is gender
    
    return np.array(images), species_labels, gender_labels

# Load and preprocess 
images, species_labels, gender_labels = load_images(dataset_path)

# Encode labels
species_encoder = LabelEncoder()
species_encoded = species_encoder.fit_transform(species_labels)
species_categorical = to_categorical(species_encoded)

gender_encoder = LabelEncoder()
gender_encoded = gender_encoder.fit_transform(gender_labels)
gender_categorical = to_categorical(gender_encoded)

# Split data into train and test sets
X_train, X_test, y_species_train, y_species_test, y_gender_train, y_gender_test = train_test_split(
    images, species_categorical, gender_categorical, test_size=0.2, random_state=42
)

# Define two separate models for species and gender classification
def create_model(num_classes):
    base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
    base_model.trainable = False  # Freeze the base model
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Create and compile models
model_species = create_model(len(species_encoder.classes_))
model_gender = create_model(len(gender_encoder.classes_))

model_species.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_gender.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the models
print("Species Model Accuracy")
history_species = model_species.fit(X_train, y_species_train, epochs=10, validation_data=(X_test, y_species_test))
print("Gender Model Accuracy")
history_gender = model_gender.fit(X_train, y_gender_train, epochs=10, validation_data=(X_test, y_gender_test))


2024-05-01 12:20:12.927798: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-01 12:20:12.927933: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-01 12:20:13.074877: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/bombus/bombus_project'

In [None]:
import matplotlib.pyplot as plt

def plot_history(history, title):
    accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(accuracy) + 1)

    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs, accuracy, 'bo', label='Training acc')
    plt.plot(epochs, val_accuracy, 'b', label='Validation acc')
    plt.title(f'{title} Training and Validation Accuracy')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'ro', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title(f'{title} Training and Validation Loss')
    plt.legend()
    
    plt.show()

# Plot its performance
print("Plots of Species Model")
plot_history(history_species, "Species Model")

# Plot its performance
print("Plots of Gender Model")
plot_history(history_gender, "Gender Model")
