In [1]:
import os
import json
import numpy as np
import pandas as pd
from PIL import Image
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input

def load_cryptovision_model(model_path: str):
    """
    Load model and attributes from the specified path.

    Args:
        model_path (str): The path to the model directory.

    Returns:
        dict: A dictionary of model attributes including the loaded model.
    """
    attributes = {}
    try:
        model_filename = os.path.basename(model_path)
        prefix, tp, base_model, acc, code = model_filename.split('_')

        attributes['prefix'] = prefix
        attributes['tp'] = tp
        attributes['base_model'] = base_model
        attributes['acc'] = acc
        attributes['code'] = code
        attributes['model_path'] = model_path

        class_indices_path = os.path.join(model_path, 'class_indices.json')
        with open(class_indices_path, 'r') as file:
            attributes['class_mapping'] = json.load(file)

        attributes['model'] = load_model(os.path.join(model_path, 'model.h5'))

        return attributes

    except FileNotFoundError as e:
        print(f"Error: {e}")
        return None
    except ValueError as e:
        print(f"Error: {e}")
        return None

def predict(model_attributes: dict, img_path: str, top_n: int = None, min_prob: float = 0.00):
    """
    Predict the class of an image using the model in model.

    Args:
        model_attributes (dict): A dictionary containing the model and class mapping.
        img_path (str): The path to the image file.
        top_n (int, optional): The number of top predictions to return. Defaults to None.
        min_prob (float, optional): The minimum probability to consider. Defaults to 0.20.

    Returns:
        dict: A dictionary with species predictions and their probabilities.
    """
    model = model_attributes['model']
    class_mapping = model_attributes['class_mapping']
    
    img = Image.open(img_path)
    if img.mode != "RGB":
        img = img.convert("RGB")
    img = img.resize((224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    probs = model.predict(img_array, verbose=False)[0]
    
    # Create a dictionary of class names and probabilities
    predictions = {class_mapping[str(i)]: float(probs[i]) for i in range(len(probs))}
    
    # Filter predictions by minimum probability
    filtered_predictions = {k: v for k, v in predictions.items() if v >= min_prob}
    
    # Sort the filtered predictions by probability in descending order and get the top N
    sorted_predictions = dict(sorted(filtered_predictions.items(), key=lambda item: item[1], reverse=True)[:top_n])
    
    return sorted_predictions

def ensamble_prediction(models_path_folder: str, img_path: str, top_n: int = None, min_prob: float = 0.00):

    ensamble_predictions = {}
    
    # for loop to load all models
    for model_name in os.listdir(models_path_folder):
        model_attributes = load_cryptovision_model(os.path.join(models_path_folder, model_name))

        # predict
        predictions = predict(model_attributes, img_path, top_n, min_prob)

        # add to ensamble_predictions
        ensamble_predictions[model_name] = predictions

    # Create a DataFrame from the ensamble_predictions dictionary
    df_predictions = pd.DataFrame(ensamble_predictions)
    df_predictions = df_predictions.reset_index().rename(columns={'index': 'labels'})

    # Split the 'labels' column into 'family', 'genus', and 'species'
    df_predictions[['family', 'genus', 'species']] = df_predictions['labels'].str.split('_', expand=True)

    # Combine genus and species for the 'species' column
    df_predictions['species'] = df_predictions['genus'] + ' ' + df_predictions['species']

    # Reorder columns to have 'family', 'genus', and 'species' at the beginning
    cols = ['family', 'genus', 'species'] + [col for col in df_predictions.columns if col not in ['family', 'genus', 'species', 'labels']]
    df_predictions = df_predictions[cols]

    # Select the columns to average
    model_columns = [col for col in df_predictions.columns if col not in ['family', 'genus', 'species', 'labels']]

    # Create a new column for the average of the selected columns
    df_predictions['ensamble_prediction_prob'] = df_predictions[model_columns].mean(axis=1)

    return df_predictions

In [10]:

model_path = '/Users/leonardo/Documents/Projects/crypto_vision/models/species/BV_label_EFFV2B0_S912_202407311711'
cv_model = load_cryptovision_model(model_path)

img_path = '/Users/leonardo/Library/CloudStorage/GoogleDrive-leonardofonseca.r@gmail.com/My Drive/04_projects/CryptoVision/Data/training/train/Chaenopsidae_Acanthemblemaria_aspera/web_Acanthemblemaria_aspera_8.jpg'
prediction_results = predict(cv_model, img_path, top_n=5, min_prob=0.0)

print(json.dumps(prediction_results, indent=4))



{
    "Gobiidae_Nemateleotris_magnifica": 0.13943737745285034,
    "Creediidae_Limnichthys_nitidus": 0.11863453686237335,
    "Pomacentridae_Pomacentrus_pavo": 0.04709126427769661,
    "Gobiidae_Istigobius_decoratus": 0.04586803913116455,
    "Syngnathidae_Corythoichthys_flavofasciatus": 0.04030103236436844
}


In [5]:
results = ensamble_prediction(
    models_path_folder='/Users/leonardo/Documents/Projects/crypto_vision/models/species',
    img_path='/Users/leonardo/Library/CloudStorage/GoogleDrive-leonardofonseca.r@gmail.com/My Drive/04_projects/CryptoVision/Data/training/train/Chaenopsidae_Acanthemblemaria_aspera/web_Acanthemblemaria_aspera_8.jpg',
    top_n=10,
    min_prob=0.10,
)

results




Unnamed: 0,family,genus,species,BV_label_EFFV2B0_S912_202407311711,BV_label_EFFv2B0_S910_202407182355,BV_label_EFFV2B2_S931_202408011557,BV_label_EFFv2B0_S911_202407170557,BV_label_EFFV2B0_S921_202408011035,BV_label_EFFV2B0_S910_202408010200,ensamble_prediction_prob
0,Gobiidae,Nemateleotris,Nemateleotris magnifica,0.139437,,,,,0.114542,0.12699
1,Creediidae,Limnichthys,Limnichthys nitidus,0.118635,,,,,,0.118635
2,Gobiidae,Istigobius,Istigobius decoratus,,,,,0.12311,,0.12311


In [None]:
results[['family', 'genus', 'species']] = results['labels'].str.split('_', expand=True)

results.head()

Unnamed: 0,labels,BV_label_EFFV2B0_S912_202407311711,BV_label_EFFv2B0_S910_202407182355,BV_label_EFFV2B2_S931_202408011557,BV_label_EFFv2B0_S911_202407170557,BV_label_EFFV2B0_S921_202408011035,BV_label_EFFV2B0_S910_202408010200,family,genus,species
0,Gobiidae_Nemateleotris_magnifica,0.139437,,,,,0.114542,Gobiidae,Nemateleotris,magnifica
1,Creediidae_Limnichthys_nitidus,0.118635,,,,,,Creediidae,Limnichthys,nitidus
2,Gobiidae_Istigobius_decoratus,,,,,0.12311,,Gobiidae,Istigobius,decoratus


In [None]:

species_predictions = {}
genera_predictions = {}

for model_name in models_list:
    model_path = f'/Users/leonardo/Documents/Projects/crypto_vision/models/select/{model_name}/model.h5'
    class_names_path = f'/Users/leonardo/Documents/Projects/crypto_vision/models/select/{model_name}/class_indices.json'

    model, class_index = load_class_names_and_model(model_path, class_names_path)

    if 'genus' in model_name:
        genera_predictions[model_name] = predict(model, image_path, class_index)
    elif 'species' or 'label' in model_name:
        species_predictions[model_name] = predict(model, image_path, class_index)


In [None]:
model, class_index = load_class_names_and_model(
    '/Users/leonardo/Documents/Projects/crypto_vision/models/species/BV_label_EFFV2B0_S921_202408011035/model.h5',
    '/Users/leonardo/Documents/Projects/crypto_vision/models/species/BV_label_EFFV2B0_S921_202408011035/class_indices.json'
)

results = predict(model, image_path, class_index)

print(json.dumps(results, indent=4))

2024-08-03 10:27:38.169742: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


{
    "Chaenopsidae_Acanthemblemaria_aspera": 0.4779396653175354,
    "Tripterygiidae_Ucla_xenogrammus": 0.28954923152923584
}


In [None]:
df_species = pd.DataFrame(species_predictions)
df_species = df_species.reset_index().rename(columns={'index': 'labels'})
df_species.head()

Unnamed: 0,labels,BV_label_EFFV2B0_S912_202407311711,BV_label_EFFv2B0_S910_202407182355,BV_label_EFFV2B2_S931_202408011557,BV_label_EFFV2B0_S921_202408011035
0,Chaenopsidae_Acanthemblemaria_aspera,0.956761,0.996712,,0.47794
1,Tripterygiidae_Ucla_xenogrammus,,,,0.289549


In [None]:

df = pd.DataFrame(species_predictions)

df = df.reset_index().rename(columns={'index': 'labels'})

# Split the 'labels' column into 'family', 'genus', and 'species'
df[['family', 'genus', 'species']] = df['labels'].str.split('_', expand=True)

# Combine genus and species for the 'species' column
df['species'] = df['genus'] + ' ' + df['species']

# Reorder columns to have 'family', 'genus', and 'species' at the beginning
cols = ['family', 'genus', 'species'] + [col for col in df.columns if col not in ['family', 'genus', 'species', 'labels']]
df = df[cols]



Unnamed: 0,family,genus,species,BV_label_EFFV2B0_S912_202407311711,BV_label_EFFv2B0_S910_202407182355,BV_label_EFFV2B2_S931_202408011557,BV_label_EFFV2B0_S921_202408011035
0,Chaenopsidae,Acanthemblemaria,Acanthemblemaria aspera,0.956761,0.996712,,0.47794
1,Tripterygiidae,Ucla,Ucla xenogrammus,,,,0.289549


In [None]:
# Select the columns to average
model_columns = [col for col in df.columns if col not in ['family', 'genus', 'species', 'labels']]

# Create a new column for the average of the selected columns
df['average_prediction'] = df[model_columns].mean(axis=1)

df.head()


Unnamed: 0,family,genus,species,BV_label_EFFV2B0_S912_202407311711,BV_label_EFFv2B0_S910_202407182355,BV_label_EFFV2B2_S931_202408011557,BV_label_EFFV2B0_S921_202408011035,average_prediction
0,Chaenopsidae,Acanthemblemaria,Acanthemblemaria aspera,0.956761,0.996712,,0.47794,0.810471
1,Tripterygiidae,Ucla,Ucla xenogrammus,,,,0.289549,0.289549


In [None]:
df_genus = df.groupby('genus')['average_prediction'].mean()
df_genus.head()

genus
Gobiodon     0.694220
Priolepis    0.305307
Name: average_prediction, dtype: float64

In [None]:
max_index = df['average_prediction'].idxmax()

df.loc[max_index, 'species']

'Gobiodon histrio'

In [None]:
df.head()

Unnamed: 0,family,genus,species,BV_label_EFFV2B0_S912_202407311711,BV_label_EFFv2B0_S910_202407182355,BV_label_EFFV2B2_S931_202408011557,BV_label_EFFV2B0_S921_202408011035,average_prediction
0,Gobiidae,Gobiodon,Gobiodon histrio,0.868777,0.970986,0.684482,0.252636,0.69422
1,Gobiidae,Priolepis,Priolepis semidoliatus,,,0.202638,0.407977,0.305307


In [None]:
df[['genus','average_prediction']]

Unnamed: 0,genus,average_prediction
0,Gobiodon,0.69422
1,Priolepis,0.305307


In [None]:
df_genus = pd.DataFrame(genera_predictions)
df_genus = df_genus.reset_index().rename(columns={'index': 'genus'})
df_genus.head()

Unnamed: 0,genus,BV_genus_EFFV2B0_S801_202407240835
0,Acanthemblemaria,0.971109


In [None]:
df_genus = df[['genus','average_prediction']].merge(df_genus, on='genus')
df_genus.head()

Unnamed: 0,genus,average_prediction,BV_genus_EFFV2B0_S801_202407240835
0,Acanthemblemaria,0.810471,0.971109


In [None]:
model_columns = [col for col in df_genus.columns if col not in ['family', 'genus', 'species', 'labels']]

# Create a new column for the average of the selected columns
df_genus['average_prediction'] = df_genus[model_columns].mean(axis=1)

df_genus.head()

Unnamed: 0,genus,average_prediction,BV_genus_EFFV2B0_S801_202407240835
0,Acanthemblemaria,0.89079,0.971109


In [None]:
import tensorflow as tf

class CriptoVisionPredictor:
    def __init__(self, models_list, models_path, class_mapping_path):

        self.models_list = models_list
        self.models_path = models_path
        self.class_mapping_path = class_mapping_path
        self.models = {}
        self.class_mappings = {}
        self.load_models_and_mappings()

    
    def load_models_and_mappings(self):
        for model_name in self.models_list:
            model_path = os.path.join(self.models_path, model_name, 'model.h5')
            class_mapping_path = os.path.join(self.models_path, model_name, 'class_indices.json')
            
            with open(class_mapping_path, 'r') as f:
                class_names = json.load(f)
            
            model = load_model(model_path)
            
            self.models[model_name] = model
            self.class_mappings[model_name] = class_names


In [None]:
results = {
    'model_type': 'model_1',
    'model_version': '2024.07.31.1711',
    'prediction_type': 'species',
    'best_model': 'model_name',
    'best_model_confidence': 0.99,
    'prediction': {
        'genus': 'Gobiodon',
        'species': 'Gobiodon histrio',
        'spe_confidence': 0.95,
        'gen_confidence': 0.95,
    },
}

In [None]:
results['prediction'].keys()

dict_keys(['Gobiidae_Gobiodon_histrio', 'Gobiidae_Priolepis_semidoliatus'])

In [None]:
def load_class_names_and_model(model_path: str, class_names_path: str):
    # Load class names
    with open(class_names_path, 'r') as f:
        class_names = json.load(f)

    # Load model
    model = load_model(model_path)

    return model, class_names

In [4]:
import pandas as pd


df = pd.read_csv('/Users/leonardo/Documents/Projects/crypto_vision/models/species/BV_label_EFFv2B0_S910_202407182355/test_data.csv', )

df.head()

Unnamed: 0.1,Unnamed: 0,path,label,family,genus,species
0,6504,/Volumes/T7_shield/CryptoVision/Misc/google_im...,Pomacentridae_Chromis_margaritifer,Pomacentridae,Chromis,margaritifer
1,2299,/Volumes/T7_shield/CryptoVision/Clustering/ID2...,Blenniidae_Enchelyurus_ater,Blenniidae,Enchelyurus,ater
2,7612,/Volumes/T7_shield/CryptoVision/Misc/google_im...,Apogonidae_Ostorhinchus_angustatus,Apogonidae,Ostorhinchus,angustatus
3,8609,/Volumes/T7_shield/CryptoVision/Misc/google_im...,Gobiidae_Istigobius_decoratus,Gobiidae,Istigobius,decoratus
4,2543,/Volumes/T7_shield/CryptoVision/Clustering/ID2...,Blenniidae_Cirripectes_variolosus,Blenniidae,Cirripectes,variolosus
