In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path
from sklearn import preprocessing

from data_cleaning import load_metadata, filter_low_counts, load_images_and_labels

In [2]:
# Set Variables
IMAGE_SIZE = (300, 225)  # The size images should be rescaled to. If None, defaults to original size
MIN_SAMPLES = 10  # The minimum number of samples needed to be included in a prediction
DATA_PATH = Path("./data/fungi-clef-2025")
MD_PATH = DATA_PATH / "metadata/FungiTastic-FewShot"
IMAGE_PATH = DATA_PATH / "images/FungiTastic-FewShot"

In [4]:
# Load the metadata
md_df = load_metadata(metadata_path=MD_PATH, image_path=IMAGE_PATH)
    
# Filter out all the fungi that don't have the min number of samples
# This might have been dropping the full test set oops
# md_df = filter_low_counts(md_df, MIN_SAMPLES)

# Drop all images where the class is nan
print(f"Num Rows: {md_df.shape[0]}")
print(f"Rows missing a class: {md_df['class'].isna().sum()}")
md_df = md_df[~md_df["class"].isna()]
print(f"Num Rows: {md_df.shape[0]}")

# Map the class to an ID
le = preprocessing.LabelEncoder()
le.fit(md_df["class"])
md_df["class_label"] = md_df["class"]
md_df["class_idx"] = le.transform(md_df["class"])

# Load all of the images and labels from the metadata
# This function currently resizes and rescales the images
images, labels = load_images_and_labels(md_df, IMAGE_SIZE)

Num Rows: 12015
Rows missing a class: 1961
Num Rows: 10054


KeyboardInterrupt: 

In [5]:
le.classes_

array(['Agaricomycetes', 'Arthoniomycetes', 'Atractiellomycetes',
       'Blastocladiomycetes', 'Candelariomycetes', 'Chytridiomycetes',
       'Coniocybomycetes', 'Cystobasidiomycetes', 'Dacrymycetes',
       'Dothideomycetes', 'Entomophthoromycetes', 'Eurotiomycetes',
       'Exobasidiomycetes', 'Geoglossomycetes', 'Glomeromycetes',
       'Laboulbeniomycetes', 'Lecanoromycetes', 'Leotiomycetes',
       'Lichinomycetes', 'Microbotryomycetes', 'Mucoromycetes',
       'Myxomycetes', 'Orbiliomycetes', 'Peronosporea', 'Pezizomycetes',
       'Pucciniomycetes', 'Sareomycetes', 'Sordariomycetes',
       'Taphrinomycetes', 'Tremellomycetes', 'Ustilaginomycetes',
       'Zoopagomycetes'], dtype=object)

In [6]:
# Re-split the images and their labels
train_idx = md_df["split"] == "train"
val_idx = md_df["split"] == "val"
test_idx = md_df["split"] == "test"

train_images = images[train_idx]
train_labels = labels[train_idx]

val_images = images[val_idx]
val_labels = labels[val_idx]

test_images = images[test_idx]
test_labels = labels[test_idx]

In [7]:
print(f"Shape train images: {train_images.shape}")
print(f"Shape val images: {val_images.shape}")
print(f"Shape test images: {test_images.shape}")

Shape train images: (7779, 300, 225, 3)
Shape val images: (2275, 300, 225, 3)
Shape test images: (0, 300, 225, 3)


In [8]:
# Add some data augmentation!
# Some horizontal flips? Random crops?
horizontal_flips = tf.image.flip_left_right(train_images)

In [None]:
train_images = np.append(train_images, horizontal_flips, axis=0)
train_labels = np.append(train_labels, train_labels.copy())

: 

In [None]:
train_images.shape

In [1]:
train_label.shape

NameError: name 'train_label' is not defined

In [8]:
# Set a random seed and clear back end
tf.keras.backend.clear_session()
tf.random.set_seed(1234)

# Convolutional Layer
conv_layer = tf.keras.layers.Conv2D(32, kernel_size=4, padding="same", activation="relu")

# Pooling Layer
pooling_layer = tf.keras.layers.MaxPool2D()

# Dropout Layer
dropout_layer = tf.keras.layers.Dropout(0.25)

# Flattening
flat_layer = tf.keras.layers.Flatten()

# Dense (Multiclassification Layer)
num_classes = len(set(labels))
softmax_layer = tf.keras.layers.Dense(num_classes)


In [9]:
model = tf.keras.Sequential([
    conv_layer,
    pooling_layer,
    dropout_layer,
    flat_layer,
    softmax_layer
])

In [10]:
images.shape

(12015, 300, 225, 3)

In [11]:
model.build(input_shape=(None, 300, 225, 3))

In [12]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [13]:
model.summary()

In [14]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='accuracy',
    verbose=1,
    patience=5,
    mode='max',
    restore_best_weights=True
)

In [15]:
history = model.fit(train_images, train_labels, epochs=10, validation_data=(val_images, val_labels), callbacks=[early_stopping])

Epoch 1/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 551ms/step - accuracy: 0.3985 - loss: 7.6699 - val_accuracy: 0.1523 - val_loss: 11.9671
Epoch 2/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 523ms/step - accuracy: 0.4101 - loss: 7.5777 - val_accuracy: 0.1523 - val_loss: 11.9670
Epoch 3/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 586ms/step - accuracy: 0.4101 - loss: 7.7260 - val_accuracy: 0.1523 - val_loss: 13.6634
Epoch 4/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 586ms/step - accuracy: 0.4101 - loss: 9.5075 - val_accuracy: 0.1523 - val_loss: 13.6634
Epoch 5/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 530ms/step - accuracy: 0.4101 - loss: 9.5075 - val_accuracy: 0.1523 - val_loss: 13.6634
Epoch 6/10
[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 496ms/step - accuracy: 0.4101 - loss: 9.5075 - val_accuracy: 0.1523 - val_loss: 13.663

In [16]:
model.evaluate(test_images, test_labels)

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 96ms/step - accuracy: 0.0000e+00 - loss: 17.5044


[17.504392623901367, 0.0]