In [37]:
# Import standard libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

# Import TensorFlow and Keras modules
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Import scikit-learn utilities
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

# Import Keras backend
import tensorflow.keras.backend as K

# Suppress TensorFlow warnings for cleaner output (optional)
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# Set random seed for reproducibility
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)


In [38]:
# Define the base directory where datasets and models are stored
base_dir = r'C:\Users\bheja\OneDrive\Desktop\Dataset'

# Define directories for PlantVillage and PlantDoc datasets
plant_vil_dir = os.path.join(base_dir, 'plantvillage')
plant_doc_dir = os.path.join(base_dir, 'plantdoc')

# Define paths for CSV files
train_csv_path = os.path.join(base_dir, 'PV_train.csv')
test_seen_csv_path = os.path.join(base_dir, 'PV_test_seen.csv')
test_unseen_csv_path = os.path.join(base_dir, 'PV_test_unseen.csv')
doc_unseen_csv_path = os.path.join(base_dir, 'PD_test_unseen.csv')

# Define path for the trained ResNet50V2 model
trained_model_path = r'C:\Users\bheja\OneDrive\Desktop\models\resnet50v2_crop.h5'

# Load CSV files into pandas DataFrames
train_data = pd.read_csv(train_csv_path, header=None, names=["image_name", "crop_class", "disease_class"])
test_seen_data = pd.read_csv(test_seen_csv_path, header=None, names=["image_name", "crop_class", "disease_class"])
test_unseen_data = pd.read_csv(test_unseen_csv_path, header=None, names=["image_name", "crop_class", "disease_class"])
doc_unseen_data = pd.read_csv(doc_unseen_csv_path, header=None, names=["image_name", "crop_class", "disease_class"])

# Display the first few rows of the training data to verify
print("Sample Training Data:")
print(train_data.head())

# Verify the data types of the relevant columns
print("\nData Types:")
print(train_data.dtypes)


Sample Training Data:
                                          image_name  crop_class  \
0  4b22a1e7-745b-4c78-a49e-14ca8cfba26a___RS_HL-8...           0   
1  9ee2b5e6-46cf-400f-a82f-660a2ac05157___RS_HL-5...           0   
2  181ce194-fdc9-4f0b-ae8d-0c621f723279___RS_HL-7...           0   
3  3e8eb97c-b2ad-4aeb-82ff-2f1334801c9b___RS_HL-6...           0   
4  4e817ac6-818a-431f-af6d-e44477f9b649___RS_HL-7...           0   

   disease_class  
0              0  
1              0  
2              0  
3              0  
4              0  

Data Types:
image_name       object
crop_class        int64
disease_class     int64
dtype: object


In [39]:
# Load the trained ResNet50V2 model
try:
    model = load_model(trained_model_path, compile=False)
    print("ResNet50V2 model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")

# Display the model summary
model.summary()


ResNet50V2 model loaded successfully.
Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_3[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                      

In [40]:
# List all layers with their indices and names
for idx, layer in enumerate(model.layers):
    print(f"{idx}: {layer.name} - {layer.output_shape}")


0: input_3 - [(None, 224, 224, 3)]
1: conv1_pad - (None, 230, 230, 3)
2: conv1_conv - (None, 112, 112, 64)
3: pool1_pad - (None, 114, 114, 64)
4: pool1_pool - (None, 56, 56, 64)
5: conv2_block1_preact_bn - (None, 56, 56, 64)
6: conv2_block1_preact_relu - (None, 56, 56, 64)
7: conv2_block1_1_conv - (None, 56, 56, 64)
8: conv2_block1_1_bn - (None, 56, 56, 64)
9: conv2_block1_1_relu - (None, 56, 56, 64)
10: conv2_block1_2_pad - (None, 58, 58, 64)
11: conv2_block1_2_conv - (None, 56, 56, 64)
12: conv2_block1_2_bn - (None, 56, 56, 64)
13: conv2_block1_2_relu - (None, 56, 56, 64)
14: conv2_block1_0_conv - (None, 56, 56, 256)
15: conv2_block1_3_conv - (None, 56, 56, 256)
16: conv2_block1_out - (None, 56, 56, 256)
17: conv2_block2_preact_bn - (None, 56, 56, 256)
18: conv2_block2_preact_relu - (None, 56, 56, 256)
19: conv2_block2_1_conv - (None, 56, 56, 64)
20: conv2_block2_1_bn - (None, 56, 56, 64)
21: conv2_block2_1_relu - (None, 56, 56, 64)
22: conv2_block2_2_pad - (None, 58, 58, 64)
23: con

In [6]:
# define feature extractor

In [41]:
from tensorflow.keras.models import Model

# Specify the name of the target layer for feature extraction
target_layer_name = 'global_average_pooling2d_2'

# Retrieve the output of the target layer
try:
    target_layer_output = model.get_layer(name=target_layer_name).output
    print(f"Successfully retrieved the output of layer: {target_layer_name}")
except ValueError:
    print(f"Layer {target_layer_name} not found. Please check the layer name.")
    # Optionally, list all layer names for reference
    for layer in model.layers:
        print(layer.name)
    raise

# Create the Feature Extractor Model
feature_extractor = Model(inputs=model.input, outputs=target_layer_output)

print("Feature extractor model created successfully.")


Successfully retrieved the output of layer: global_average_pooling2d_2
Feature extractor model created successfully.


In [42]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define ImageDataGenerator for feature extraction (no augmentation needed)
feature_datagen = ImageDataGenerator(rescale=1./255)

def create_feature_generator_optimized(dataframe, directory, target_size=(224, 224), batch_size=128, shuffle=False):
    """
    Creates an optimized ImageDataGenerator for feature extraction with multiple workers.
    
    Args:
        dataframe (pd.DataFrame): DataFrame containing image paths.
        directory (str): Directory where images are stored.
        target_size (tuple): Desired image size.
        batch_size (int): Number of images per batch.
        shuffle (bool): Whether to shuffle the data.
    
    Returns:
        Iterator: Keras generator yielding batches of images.
    """
    generator = feature_datagen.flow_from_dataframe(
        dataframe=dataframe,
        directory=directory,
        x_col="image_name",
        y_col=None,  # No labels needed for feature extraction
        target_size=target_size,
        batch_size=batch_size,
        class_mode=None,
        shuffle=shuffle,
        seed=42,
        workers=4,  # Number of parallel workers (adjust based on your CPU)
        use_multiprocessing=True
    )
    return generator

# Recreate the optimized generators with a larger batch size
train_feature_gen_optimized = create_feature_generator_optimized(
    dataframe=train_data,
    directory=plant_vil_dir,
    batch_size=64,
    shuffle=False
)

test_seen_feature_gen_optimized = create_feature_generator_optimized(
    dataframe=test_seen_data,
    directory=plant_vil_dir,
    batch_size=64,
    shuffle=False
)

test_unseen_feature_gen_optimized = create_feature_generator_optimized(
    dataframe=test_unseen_data,
    directory=plant_vil_dir,
    batch_size=64,
    shuffle=False
)

doc_unseen_feature_gen_optimized = create_feature_generator_optimized(
    dataframe=doc_unseen_data,
    directory=plant_doc_dir,
    batch_size=64,
    shuffle=False
)

print("Optimized feature data generators created successfully.")


Found 38994 validated image filenames.
Found 10279 validated image filenames.
Found 216 validated image filenames.
Found 71 validated image filenames.
Optimized feature data generators created successfully.


In [43]:
import numpy as np

def extract_features_direct(generator, model, num_samples):
    """
    Extracts features from all images in a generator using the provided model.
    Utilizes Keras's built-in predict method for efficiency.
    
    Args:
        generator (Iterator): Keras ImageDataGenerator iterator.
        model (tf.keras.Model): Feature extractor model.
        num_samples (int): Total number of samples to process.
    
    Returns:
        np.array: Extracted features.
    """
    # Calculate the number of steps (batches)
    steps = int(np.ceil(num_samples / generator.batch_size))
    
    # Use Keras's predict method with generator
    features = model.predict(generator, steps=steps, verbose=1)
    
    return features


In [10]:
# Define the number of samples in each dataset
num_train = train_feature_gen_optimized.n
num_test_seen = test_seen_feature_gen_optimized.n
num_test_unseen = test_unseen_feature_gen_optimized.n
num_doc_unseen = doc_unseen_feature_gen_optimized.n

# Extract features for training data
print("Extracting Training Features...")
train_features = extract_features_direct(
    generator=train_feature_gen_optimized,
    model=feature_extractor,
    num_samples=num_train
)
print(f"Training features extracted: {train_features.shape}")

# Extract features for seen test data
print("\nExtracting Seen Test Features...")
test_seen_features = extract_features_direct(
    generator=test_seen_feature_gen_optimized,
    model=feature_extractor,
    num_samples=num_test_seen
)
print(f"Seen Test features extracted: {test_seen_features.shape}")

# Extract features for unseen test data
print("\nExtracting Unseen Test Features...")
test_unseen_features = extract_features_direct(
    generator=test_unseen_feature_gen_optimized,
    model=feature_extractor,
    num_samples=num_test_unseen
)
print(f"Unseen Test features extracted: {test_unseen_features.shape}")

# Extract features for PlantDoc unseen test data
print("\nExtracting PlantDoc Unseen Test Features...")
doc_unseen_features = extract_features_direct(
    generator=doc_unseen_feature_gen_optimized,
    model=feature_extractor,
    num_samples=num_doc_unseen
)
print(f"PlantDoc Unseen Test features extracted: {doc_unseen_features.shape}")


Extracting Training Features...
Training features extracted: (38994, 2048)

Extracting Seen Test Features...
Seen Test features extracted: (10279, 2048)

Extracting Unseen Test Features...
Unseen Test features extracted: (216, 2048)

Extracting PlantDoc Unseen Test Features...
PlantDoc Unseen Test features extracted: (71, 2048)


In [11]:
import os
import numpy as np

# Define directory to save features
features_dir = r'C:\Users\bheja\OneDrive\Desktop\features'

# Create the directory if it doesn't exist
os.makedirs(features_dir, exist_ok=True)

# Save features as .npy files
np.save(os.path.join(features_dir, 'train_features.npy'), train_features)
np.save(os.path.join(features_dir, 'test_seen_features.npy'), test_seen_features)
np.save(os.path.join(features_dir, 'test_unseen_features.npy'), test_unseen_features)
np.save(os.path.join(features_dir, 'doc_unseen_features.npy'), doc_unseen_features)

print("All features saved successfully.")


All features saved successfully.


In [44]:
import os
import numpy as np

# Define directory where features are saved
features_dir = r'C:\Users\bheja\OneDrive\Desktop\features'

# Load features from .npy files
train_features = np.load(os.path.join(features_dir, 'train_features.npy'))
test_seen_features = np.load(os.path.join(features_dir, 'test_seen_features.npy'))
test_unseen_features = np.load(os.path.join(features_dir, 'test_unseen_features.npy'))
doc_unseen_features = np.load(os.path.join(features_dir, 'doc_unseen_features.npy'))

print("All features loaded successfully.")


All features loaded successfully.


In [45]:
# Verify shapes
print(f"Training Features Shape: {train_features.shape}")
print(f"Seen Test Features Shape: {test_seen_features.shape}")
print(f"Unseen Test Features Shape: {test_unseen_features.shape}")
print(f"PlantDoc Unseen Test Features Shape: {doc_unseen_features.shape}")

# Display a sample feature vector
print("\nSample Training Feature Vector (first 5 elements):")
print(train_features[0][:5])


Training Features Shape: (38994, 2048)
Seen Test Features Shape: (10279, 2048)
Unseen Test Features Shape: (216, 2048)
PlantDoc Unseen Test Features Shape: (71, 2048)

Sample Training Feature Vector (first 5 elements):
[0.         0.         0.02133459 0.         0.        ]


In [46]:
from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Encode the 'crop_class' labels
train_data['encoded_crop_class'] = label_encoder.fit_transform(train_data['crop_class'])

# Verify the encoding
print("\nEncoded Labels:")
print(train_data[['crop_class', 'encoded_crop_class']].head())
print("\nUnique Encoded Classes:", train_data['encoded_crop_class'].unique())



Encoded Labels:
   crop_class  encoded_crop_class
0           0                   0
1           0                   0
2           0                   0
3           0                   0
4           0                   0

Unique Encoded Classes: [ 0  1  2  3  4  6  8  9 10 12 13 11  5  7]


In [48]:
# Encode the 'crop_class' labels in validation data using the same LabelEncoder
test_seen_data['encoded_crop_class'] = label_encoder.transform(test_seen_data['crop_class'])

# Verify the encoding
print("\nEncoded Validation Labels:")
print(test_seen_data[['crop_class', 'encoded_crop_class']].head())

# Display unique encoded classes in validation data
print("\nUnique Encoded Classes in Validation Data:", test_seen_data['encoded_crop_class'].unique())



Encoded Validation Labels:
   crop_class  encoded_crop_class
0           0                   0
1           0                   0
2           0                   0
3           0                   0
4           0                   0

Unique Encoded Classes in Validation Data: [ 0  1  2  3  4  6  8  9 10 12 13 11  5  7]


In [49]:
# Define the number of training pairs you want to create
num_train_pairs = 50000  # Adjust based on your dataset size and requirements

# Extract training labels as a NumPy array
train_labels_array = train_data['encoded_crop_class'].values

# Create training pairs using the provided create_pairs function
train_pair_features, train_pair_labels = create_pairs(train_features, train_labels_array, num_train_pairs)

# Check the number of created pairs
print(f"Total Training Pairs Created: {len(train_pair_features)}")
print(f"Total Training Labels Created: {len(train_pair_labels)}")


Total Training Pairs Created: 50000
Total Training Labels Created: 50000


In [50]:
# Define the number of validation pairs you want to create
num_val_pairs = 10000  # Adjust based on your dataset size and requirements

# Extract validation labels as a NumPy array
val_labels_array = test_seen_data['encoded_crop_class'].values

# Create validation pairs using the provided create_pairs function
val_pair_features, val_pair_labels = create_pairs(test_seen_features, val_labels_array, num_val_pairs)

# Check the number of created pairs
print(f"Total Validation Pairs Created: {len(val_pair_features)}")
print(f"Total Validation Labels Created: {len(val_pair_labels)}")


Total Validation Pairs Created: 10000
Total Validation Labels Created: 10000


In [51]:
# Separate the training pairs into two arrays
train_pair_features_1 = np.array([pair[0] for pair in train_pair_features])
train_pair_features_2 = np.array([pair[1] for pair in train_pair_features])
train_pair_labels = np.array(train_pair_labels)

# Display the shapes to verify
print(f"Training Pair Features 1 Shape: {train_pair_features_1.shape}")
print(f"Training Pair Features 2 Shape: {train_pair_features_2.shape}")
print(f"Training Pair Labels Shape: {train_pair_labels.shape}")

# Separate the validation pairs into two arrays
val_pair_features_1 = np.array([pair[0] for pair in val_pair_features])
val_pair_features_2 = np.array([pair[1] for pair in val_pair_features])
val_pair_labels = np.array(val_pair_labels)

# Display the shapes to verify
print(f"Validation Pair Features 1 Shape: {val_pair_features_1.shape}")
print(f"Validation Pair Features 2 Shape: {val_pair_features_2.shape}")
print(f"Validation Pair Labels Shape: {val_pair_labels.shape}")


Training Pair Features 1 Shape: (50000, 2048)
Training Pair Features 2 Shape: (50000, 2048)
Training Pair Labels Shape: (50000,)
Validation Pair Features 1 Shape: (10000, 2048)
Validation Pair Features 2 Shape: (10000, 2048)
Validation Pair Labels Shape: (10000,)


In [52]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

def create_base_network(input_dim):
    """
    Creates the base network for feature processing.

    Args:
        input_dim (int): Dimension of the input feature vector.

    Returns:
        keras.Model: Base network model.
    """
    input = Input(shape=(input_dim,), name='Base_Input')
    x = Dense(512, activation='relu', name='Dense_512')(input)
    x = Dense(256, activation='relu', name='Dense_256')(x)
    x = Dense(128, activation='relu', name='Dense_128')(x)
    return Model(inputs=input, outputs=x, name='Base_Network')

# Define input dimension
input_dim = train_features.shape[1]  # 2048

# Create the base network
base_network = create_base_network(input_dim)

# Display the base network summary
base_network.summary()


Model: "Base_Network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Base_Input (InputLayer)     [(None, 2048)]            0         
                                                                 
 Dense_512 (Dense)           (None, 512)               1049088   
                                                                 
 Dense_256 (Dense)           (None, 256)               131328    
                                                                 
 Dense_128 (Dense)           (None, 128)               32896     
                                                                 
Total params: 1,213,312
Trainable params: 1,213,312
Non-trainable params: 0
_________________________________________________________________


In [53]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

def create_base_network(input_dim):
    """
    Creates the base network for feature processing.

    Args:
        input_dim (int): Dimension of the input feature vector.

    Returns:
        keras.Model: Base network model.
    """
    input = Input(shape=(input_dim,), name='Base_Input')
    x = Dense(512, activation='relu', name='Dense_512')(input)
    x = Dense(256, activation='relu', name='Dense_256')(x)
    x = Dense(128, activation='relu', name='Dense_128')(x)
    return Model(inputs=input, outputs=x, name='Base_Network')

# Define input dimension
input_dim = train_features.shape[1]  # 2048

# Create the base network
base_network = create_base_network(input_dim)

# Display the base network summary
base_network.summary()


Model: "Base_Network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Base_Input (InputLayer)     [(None, 2048)]            0         
                                                                 
 Dense_512 (Dense)           (None, 512)               1049088   
                                                                 
 Dense_256 (Dense)           (None, 256)               131328    
                                                                 
 Dense_128 (Dense)           (None, 128)               32896     
                                                                 
Total params: 1,213,312
Trainable params: 1,213,312
Non-trainable params: 0
_________________________________________________________________


In [54]:
from tensorflow.keras.optimizers import Adam

# Compile the model
siamese_network.compile(
    loss='binary_crossentropy',
    optimizer=Adam(learning_rate=0.001),
    metrics=['accuracy']
)

print("Siamese Network compiled successfully.")


Siamese Network compiled successfully.


In [55]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Define the path to save the best model
checkpoint_path = os.path.join(features_dir, 'siamese_network_best.h5')

# EarlyStopping callback to stop training when validation loss doesn't improve
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

# ReduceLROnPlateau callback to reduce learning rate when validation loss plateaus
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=1
)

# ModelCheckpoint callback to save the best model based on validation loss
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

# Combine all callbacks into a list
callbacks = [early_stopping, reduce_lr, model_checkpoint]

print("Callbacks defined successfully.")


Callbacks defined successfully.


In [56]:
# Define the number of epochs and batch size
epochs = 20  # You can adjust this based on your requirements
batch_size = 128  # Adjust based on your system's memory capacity

# Train the Siamese Network
history = siamese_network.fit(
    [train_pair_features_1, train_pair_features_2],
    train_pair_labels,
    validation_data=([val_pair_features_1, val_pair_features_2], val_pair_labels),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1  # Set to 1 to see progress bar, 2 for one line per epoch
)

print("Training completed successfully.")


Epoch 1/20
Epoch 1: val_loss improved from inf to 0.10421, saving model to C:\Users\bheja\OneDrive\Desktop\features\siamese_network_best.h5
Epoch 2/20
Epoch 2: val_loss did not improve from 0.10421
Epoch 3/20
Epoch 3: val_loss improved from 0.10421 to 0.09664, saving model to C:\Users\bheja\OneDrive\Desktop\features\siamese_network_best.h5
Epoch 4/20
Epoch 4: val_loss did not improve from 0.09664
Epoch 5/20
Epoch 5: val_loss did not improve from 0.09664
Epoch 6/20
Epoch 6: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 6: val_loss did not improve from 0.09664
Epoch 7/20
Epoch 7: val_loss did not improve from 0.09664
Epoch 8/20

Epoch 8: val_loss did not improve from 0.09664
Epoch 8: early stopping
Training completed successfully.


In [57]:
from tensorflow.keras.models import load_model
import os

# Define the path to the best saved model
best_model_path = os.path.join(features_dir, 'siamese_network_best.h5')

# Check if the best model file exists
if os.path.exists(best_model_path):
    print(f"Best model found at: {best_model_path}")
else:
    print(f"Best model not found at: {best_model_path}. Please ensure the model was saved correctly.")

# Load the best saved Siamese Network model
try:
    best_siamese_model = load_model(best_model_path, compile=False)
    print("Best Siamese Network model loaded successfully.")
except Exception as e:
    print(f"Error loading the best Siamese Network model: {e}")


Best model found at: C:\Users\bheja\OneDrive\Desktop\features\siamese_network_best.h5
Best Siamese Network model loaded successfully.


In [58]:
import numpy as np

def create_class_prototypes(features, labels, num_classes):
    """
    Creates prototype feature vectors for each class.

    Args:
        features (np.array): Array of feature vectors.
        labels (np.array): Array of encoded class labels.
        num_classes (int): Total number of classes.

    Returns:
        prototypes (dict): Dictionary mapping class labels to prototype vectors.
    """
    prototypes = {}
    for cls in range(num_classes):
        class_features = features[labels == cls]
        if len(class_features) == 0:
            print(f"Warning: No samples for class {cls}")
            continue
        prototype = np.mean(class_features, axis=0)
        prototypes[cls] = prototype
    return prototypes

# Number of classes
num_classes = len(label_encoder.classes_)

# Create prototypes for seen classes using training data
train_prototypes = create_class_prototypes(train_features, train_labels_array, num_classes)

# Display a prototype for the first class
first_class = label_encoder.inverse_transform([0])[0]
print(f"Prototype for class '{first_class}': {train_prototypes[0][:5]}...")


Prototype for class '0': [0.         0.00864174 0.02063849 0.00041282 0.00025575]...


In [59]:
from scipy.spatial.distance import cosine

def classify_image(feature, prototypes, top_k=5):
    """
    Classifies an image feature by finding the top_k closest class prototypes based on cosine similarity.

    Args:
        feature (np.array): Feature vector of the image.
        prototypes (dict): Dictionary of class prototypes.
        top_k (int): Number of top similar classes to consider.

    Returns:
        top_classes (list): List of top_k predicted class labels.
        similarity_scores (list): List of corresponding similarity scores.
    """
    similarity_scores = {}
    for cls, prototype in prototypes.items():
        similarity = 1 - cosine(feature, prototype)  # Cosine similarity
        similarity_scores[cls] = similarity

    # Sort classes based on similarity scores in descending order
    sorted_classes = sorted(similarity_scores.items(), key=lambda item: item[1], reverse=True)
    
    # Extract top_k classes and their scores
    top_classes = [label_encoder.inverse_transform([cls])[0] for cls, score in sorted_classes[:top_k]]
    similarity_scores_sorted = [score for cls, score in sorted_classes[:top_k]]
    
    return top_classes, similarity_scores_sorted

def evaluate_model_on_dataset(features, labels, prototypes, top_k=5):
    """
    Evaluates the model's performance on a given dataset.

    Args:
        features (np.array): Array of feature vectors.
        labels (np.array): Array of true encoded class labels.
        prototypes (dict): Dictionary of class prototypes.
        top_k (int): Number of top similar classes to consider.

    Returns:
        top1_accuracy (float): Top-1 accuracy.
        top5_accuracy (float): Top-5 accuracy.
    """
    correct_top1 = 0
    correct_top5 = 0
    total = len(labels)
    
    for i in range(total):
        feature = features[i]
        true_label = label_encoder.inverse_transform([labels[i]])[0]
        predicted_topk, _ = classify_image(feature, prototypes, top_k=top_k)
        
        if true_label == predicted_topk[0]:
            correct_top1 += 1
            correct_top5 += 1
        elif true_label in predicted_topk:
            correct_top5 += 1
    
    top1_accuracy = correct_top1 / total
    top5_accuracy = correct_top5 / total
    
    return top1_accuracy, top5_accuracy

print("Inference functions defined successfully.")


Inference functions defined successfully.


In [60]:
# Extract training labels as a NumPy array
train_labels_eval = train_data['encoded_crop_class'].values

# Evaluate on Training Data
train_top1_acc, train_top5_acc = evaluate_model_on_dataset(
    features=train_features,
    labels=train_labels_eval,
    prototypes=train_prototypes,
    top_k=5
)

print(f"Training Data - Top-1 Accuracy: {train_top1_acc * 100:.2f}%")
print(f"Training Data - Top-5 Accuracy: {train_top5_acc * 100:.2f}%")


Training Data - Top-1 Accuracy: 98.76%
Training Data - Top-5 Accuracy: 100.00%


In [61]:
# Extract validation labels as a NumPy array
val_labels_eval = test_seen_data['encoded_crop_class'].values

# Evaluate on Test Seen Data (Validation Set)
val_top1_acc, val_top5_acc = evaluate_model_on_dataset(
    features=test_seen_features,
    labels=val_labels_eval,
    prototypes=train_prototypes,
    top_k=5
)

print(f"Test Seen Data (Validation) - Top-1 Accuracy: {val_top1_acc * 100:.2f}%")
print(f"Test Seen Data (Validation) - Top-5 Accuracy: {val_top5_acc * 100:.2f}%")


Test Seen Data (Validation) - Top-1 Accuracy: 98.33%
Test Seen Data (Validation) - Top-5 Accuracy: 99.93%


In [63]:
# Encode the 'crop_class' labels in test_unseen_data using the same LabelEncoder
try:
    test_unseen_data['encoded_crop_class'] = label_encoder.transform(test_unseen_data['crop_class'])
    print("\nEncoded Test Unseen Labels:")
    print(test_unseen_data[['crop_class', 'encoded_crop_class']].head())
    
    # Display unique encoded classes in test_unseen_data
    print("\nUnique Encoded Classes in Test Unseen Data:", test_unseen_data['encoded_crop_class'].unique())
except ValueError as e:
    print(f"Error encoding test_unseen_data: {e}")
    
    # Identify classes not in label_encoder
    unseen_classes = set(test_unseen_data['crop_class']) - set(label_encoder.classes_)
    print(f"Unseen classes in test_unseen_data: {unseen_classes}")
    
    # Optionally, you can handle unseen classes here
    # For example, you can assign a special label or exclude these samples
    # Here's how to assign a special label (e.g., -1) to unseen classes:
    test_unseen_data['encoded_crop_class'] = test_unseen_data['crop_class'].apply(
        lambda x: label_encoder.transform([x])[0] if x in label_encoder.classes_ else -1
    )
    print("\nAfter handling unseen classes:")
    print(test_unseen_data[['crop_class', 'encoded_crop_class']].head())



Encoded Test Unseen Labels:
   crop_class  encoded_crop_class
0           7                   7
1           7                   7
2           7                   7
3           7                   7
4           7                   7

Unique Encoded Classes in Test Unseen Data: [7]


In [64]:
# Extract test unseen labels as a NumPy array
test_unseen_labels_eval = test_unseen_data['encoded_crop_class'].values

# Evaluate on Test Unseen Data
test_unseen_top1_acc, test_unseen_top5_acc = evaluate_model_on_dataset(
    features=test_unseen_features,
    labels=test_unseen_labels_eval,
    prototypes=train_prototypes,
    top_k=5
)

print(f"Test Unseen Data - Top-1 Accuracy: {test_unseen_top1_acc * 100:.2f}%")
print(f"Test Unseen Data - Top-5 Accuracy: {test_unseen_top5_acc * 100:.2f}%")


Test Unseen Data - Top-1 Accuracy: 75.93%
Test Unseen Data - Top-5 Accuracy: 96.76%


In [65]:
# Encode the 'crop_class' labels in doc_unseen_data using the same LabelEncoder
try:
    doc_unseen_data['encoded_crop_class'] = label_encoder.transform(doc_unseen_data['crop_class'])
    print("\nEncoded PlantDoc Unseen Labels:")
    print(doc_unseen_data[['crop_class', 'encoded_crop_class']].head())
    
    # Display unique encoded classes in doc_unseen_data
    print("\nUnique Encoded Classes in PlantDoc Unseen Data:", doc_unseen_data['encoded_crop_class'].unique())
except ValueError as e:
    print(f"Error encoding doc_unseen_data: {e}")
    
    # Identify classes not in label_encoder
    unseen_classes = set(doc_unseen_data['crop_class']) - set(label_encoder.classes_)
    print(f"Unseen classes in doc_unseen_data: {unseen_classes}")
    
    # Optionally, handle unseen classes similarly
    doc_unseen_data['encoded_crop_class'] = doc_unseen_data['crop_class'].apply(
        lambda x: label_encoder.transform([x])[0] if x in label_encoder.classes_ else -1
    )
    print("\nAfter handling unseen classes:")
    print(doc_unseen_data[['crop_class', 'encoded_crop_class']].head())



Encoded PlantDoc Unseen Labels:
   crop_class  encoded_crop_class
0           7                   7
1           7                   7
2           7                   7
3           7                   7
4           7                   7

Unique Encoded Classes in PlantDoc Unseen Data: [7]


In [66]:
# Extract PlantDoc unseen labels as a NumPy array
doc_unseen_labels_eval = doc_unseen_data['encoded_crop_class'].values

# Evaluate on PlantDoc Unseen Test Data
doc_unseen_top1_acc, doc_unseen_top5_acc = evaluate_model_on_dataset(
    features=doc_unseen_features,
    labels=doc_unseen_labels_eval,
    prototypes=train_prototypes,
    top_k=5
)

print(f"PlantDoc Unseen Test Data - Top-1 Accuracy: {doc_unseen_top1_acc * 100:.2f}%")
print(f"PlantDoc Unseen Test Data - Top-5 Accuracy: {doc_unseen_top5_acc * 100:.2f}%")


PlantDoc Unseen Test Data - Top-1 Accuracy: 46.48%
PlantDoc Unseen Test Data - Top-5 Accuracy: 88.73%
