In [1]:
# LOAD MODEL WEIGHTS FOR FAMILY, ORDER, AND SUBCLASS CNNS,
# AND GET PREDICTIONS FROM EACH FOR THE SAME IMAGE SET

from tensorflow.python.keras.applications.resnet_v2 import preprocess_input, ResNet50V2
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.python.keras.models import Sequential, save_model, load_model, optimizers
from tensorflow.python.keras.layers import Activation, Dense, Flatten, GlobalAveragePooling2D 

from tensorflow.python.keras.models import load_model
from tensorflow.keras.models import Model
#from tensorflow.keras import backend as K
from tensorflow import keras

from sklearn.preprocessing import LabelBinarizer

from glob import glob

import tensorflow as tf
import pandas as pd
import numpy as np

import os, pathlib

# load ResNet50_V2
resnet = ResNet50V2(include_top=False, pooling="avg", weights='imagenet')

print("Number of GPUs available:", len(tf.config.list_physical_devices('GPU')))
print("Built with CUDA:",tf.test.is_built_with_cuda())
print("Tensorflow version:",tf.__version__)

Num GPUs Available: 0
Built with CUDA: True
Tensorflow version: 2.3.1


In [3]:
# SET FILE PATHS AND ASSESS IMAGES FOR PREDICTIONS
# path to working directory
base_path = "your/working/directory/"

# this directory needs to contain:
# 1) the weights of the three models, named "weights_family.hdf5", "weights_order.hdf5", "weights_clade.hdf5"
# 2) the class names of the three models, named "classnames_family.txt", "classnames_order.txt", "classnames_clade.txt"
# 3) a directory with the prediction images, named "IMAGES_for_prediction";
#     the images have to be sorted into directories with their respective family names,
#     the images have to be named by the name of the family they belong to, and a unique identifier:
#     familyname_uniqueIdentifyer.jpg. Example:
#     "your/working/directory/IMAGES_for_prediction/Nuculidae/Nuculidae_idigbio001.jpg"
#     "your/working/directory/IMAGES_for_prediction/Nuculidae/Nuculidae_idigbio002.jpg"
#     "your/working/directory/IMAGES_for_prediction/Pectinidae/Pectinidae_gbif001.jpg"
#     "your/working/directory/IMAGES_for_prediction/Pectinidae/Pectinidae_gbif001.jpg"

#  path to image folder:
pred_path = base_path + "IMAGES_for_prediction/"
image_size = 224

img_paths = list(pathlib.Path(pred_path).glob('*/*.jpg'))
img_names = [os.path.split(img)[1] for img in img_paths]
print("Found",len(img_names),"images for prediction.")

# get class names of those images
img_dirs = pathlib.Path(pred_path)
class_names = np.array([item.name for item in img_dirs.glob('*')])
num_classes = len(class_names)

print(f'\nThey belong to',num_classes,'CLASSES:\n',class_names)

# the predictions-image import function
def read_and_prep_images(img_paths, img_height=image_size, img_width=image_size):
    imgs = [load_img(img_path, target_size=(img_height, img_width)) for img_path in img_paths]
    img_array = np.array([img_to_array(img) for img in imgs])
    output = preprocess_input(img_array)
    return(output)



In [4]:
# taxonomic levels to loop through
tax_levels = ["family", "order", "clade"]

for taxonomic_level in tax_levels:

    #___ get class names
    classnames_filename = base_path + "classnames_" + taxonomic_level + ".txt"
    model_classnames = open(classnames_filename).read().splitlines()
    num_classes = len(model_classnames) 
    print(f'\nThe', taxonomic_level, 'model has',num_classes,'classes:\n',model_classnames)

    #___ create model
    logits = Dense(num_classes)(resnet.layers[-1].output)
    output = Activation('softmax')(logits)
    model = Model(resnet.input, output)
    model.compile(loss = "categorical_crossentropy", metrics=["accuracy"]) 

    #___ load weights
    weights_filename = base_path + "weights_" + taxonomic_level + ".hdf5"
    model.load_weights(weights_filename)
    print(f'Model created and weights loaded.')

    #___ get predictions
    # load images for predictions in batches of 1000
    test_batches = np.repeat(1000,int(len(img_paths) / 1000))
    test_batches = np.append(test_batches,len(img_paths) % 1000)
    res_preds = np.zeros(shape=(1,num_classes), dtype=float)

    x=0
    for i in range(len(test_batches)):
        y=x+test_batches[i]

        # get images
        test_data = read_and_prep_images(img_paths[x:y])

        # get predictions    
        preds = model.predict(test_data)
        res_preds=np.concatenate((res_preds,preds), axis=0)

        x=x+test_batches[i]

    # remove first row from res_preds; it was just a dummy for initializing...
    res_preds = np.delete(res_preds, (0), axis=0)
    print("Made", str(len(res_preds)),"predictions with", taxonomic_level, "model.")

    # put predictions into Pandas df and add family names
    df=pd.DataFrame(res_preds, columns=model_classnames)
    df['image'] = img_names
    df['family'] = [x.split('_')[0] for x in img_names]

    # save averages by family
    families_avg=df.groupby('family').mean()
    families_avg.to_csv(base_path +  taxonomic_level + '_predictions_averaged_by_family.csv', index=True)


