In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, precision_score, f1_score, recall_score
import seaborn as sns
import os
import datetime

In [2]:
def evaluate_model(model_path, test_dir, target_size=(224, 224), output_dir=r"C:\Users\priya\Desktop\Iris_Recognition\Gpt\Evaluation_Result"):
    """
    Evaluate a trained model on test data with comprehensive metrics and visualizations.
    Includes detailed debugging for confusion matrix issues.
    """
    try:
        # Create output directory
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Load model
        try:
            model = load_model(model_path)
            print("Model loaded successfully")
        except Exception as e:
            print(f"Error loading model: {str(e)}")
            return

        # Create test data generator with consistent target size
        test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
        test_generator = test_datagen.flow_from_directory(
            test_dir,
            target_size=target_size,
            batch_size=16,
            class_mode='categorical',
            shuffle=False
        )
        print(f"Found {test_generator.samples} images belonging to {len(test_generator.class_indices)} classes.")

        # Create index-to-class mapping
        class_indices = test_generator.class_indices
        index_to_class = {v: k for k, v in class_indices.items()}
        class_labels = list(class_indices.keys())
        print(f"Class indices: {class_indices}")

        # Evaluate model to get a baseline
        test_loss, test_acc = model.evaluate(test_generator, verbose=2)
        print(f"\nTest accuracy: {test_acc:.4f}")
        print(f"Test loss: {test_loss:.4f}")

        # Make predictions with explicit steps
        test_generator.reset()
        num_samples = test_generator.samples
        predictions = model.predict(test_generator, steps=int(num_samples / test_generator.batch_size), verbose=1)
        predicted_classes = np.argmax(predictions[:num_samples], axis=1)  # Ensure length matches
        true_classes = test_generator.classes[:num_samples]

        # Debugging prints
        print(f"Number of true classes: {len(true_classes)}")
        print(f"Number of predicted classes: {len(predicted_classes)}")
        print(f"Sample predictions: {predicted_classes[:10]}")
        print(f"Sample true classes: {true_classes[:10]}")
        print(f"Max prediction value: {np.max(predictions)}")
        print(f"Min prediction value: {np.min(predictions)}")

        # Verify and adjust lengths
        if len(true_classes) != len(predicted_classes):
            print(f"Warning: Mismatch in lengths - True: {len(true_classes)}, Predicted: {len(predicted_classes)}")
            min_length = min(len(true_classes), len(predicted_classes))
            true_classes = true_classes[:min_length]
            predicted_classes = predicted_classes[:min_length]
        else:
            print("Lengths match successfully")

        # Calculate confusion matrix
        cm = confusion_matrix(true_classes, predicted_classes)
        print(f"Confusion matrix shape: {cm.shape}")
        print(f"Confusion matrix content:\n{cm}")

        # Plot full confusion matrix for first 10 classes
        plt.figure(figsize=(12, 10))
        sns.heatmap(cm[:10, :10], annot=True, fmt='d', cmap='Blues',  # Limit to 10x10 for readability
                    xticklabels=class_labels[:10],
                    yticklabels=class_labels[:10])
        plt.title('Confusion Matrix (First 10 Classes)')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
        plt.close()

        macro_precision = precision_score(true_classes, predicted_classes, average="macro", zero_division=0)  
        macro_recall    = recall_score   (true_classes, predicted_classes, average="macro", zero_division=0)  
        macro_f1        = f1_score       (true_classes, predicted_classes, average="macro", zero_division=0) 

        # Calculate and save classification report
        print("\nClassification Report:")
        print(classification_report(true_classes, predicted_classes, target_names=class_labels, zero_division=0))
        with open(os.path.join(output_dir, 'classification_report.txt'), 'w') as f:
            f.write(classification_report(true_classes, predicted_classes, target_names=class_labels, zero_division=0))

        # Save overall metrics
        accuracy = np.mean(predicted_classes == true_classes)
        metrics = {
            'test_accuracy': test_acc,
            'test_loss': test_loss,
            'overall_accuracy': accuracy,
            'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        with open(os.path.join(output_dir, 'metrics.txt'), 'w') as f:
            for key, value in metrics.items():
                f.write(f"{key}: {value}\n")

        print(f"\nOverall Accuracy: {accuracy:.4f}")
        print(f"Evaluation results saved in {output_dir}")

    except Exception as e:
        print(f"An unexpected error occurred: {str(e)}")

In [4]:
import os

for root, dirs, files in os.walk("C:\\Users\\priya"):
    for file in files:
        if file.endswith(".h5"):
            print(os.path.join(root, file))

C:\Users\priya\.conda\envs\Iris_Reco\Lib\site-packages\h5py\tests\data_files\vlen_string_dset.h5
C:\Users\priya\.conda\envs\Iris_Reco\Lib\site-packages\h5py\tests\data_files\vlen_string_dset_utc.h5
C:\Users\priya\.conda\envs\Iris_Reco\Lib\site-packages\h5py\tests\data_files\vlen_string_s390x.h5
C:\Users\priya\.conda\pkgs\h5py-3.12.1-py39h535c9fb_1\Lib\site-packages\h5py\tests\data_files\vlen_string_dset.h5
C:\Users\priya\.conda\pkgs\h5py-3.12.1-py39h535c9fb_1\Lib\site-packages\h5py\tests\data_files\vlen_string_dset_utc.h5
C:\Users\priya\.conda\pkgs\h5py-3.12.1-py39h535c9fb_1\Lib\site-packages\h5py\tests\data_files\vlen_string_s390x.h5
C:\Users\priya\.keras\models\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
C:\Users\priya\AppData\Roaming\Python\Python38\site-packages\h5py\tests\data_files\vlen_string_dset.h5
C:\Users\priya\AppData\Roaming\Python\Python38\site-packages\h5py\tests\data_files\vlen_string_dset_utc.h5
C:\Users\priya\AppData\Roaming\Python\Python38\site-packages\h5py

In [6]:
  model_path = r"C:\Users\priya\Desktop\Iris_Recognition\Gpt\results\global_model_final.h5"

test_dirs = {
    "Client1_Test": r"C:\Users\priya\Desktop\Iris_Recognition\Final_Dataset\Clients\client1\test",
    "Client2_Test": r"C:\Users\priya\Desktop\Iris_Recognition\Final_Dataset\Clients\client2\test",
    "Client3_Test": r"C:\Users\priya\Desktop\Iris_Recognition\Final_Dataset\Clients\client3\test"
}

for client_name, path in test_dirs.items():
    print(f"\n🔍 Evaluating model on {client_name}...")
    evaluate_model(model_path, path, target_size=(224, 224))



🔍 Evaluating model on Client1_Test...




Model loaded successfully
Found 148 images belonging to 74 classes.


  self._warn_if_super_not_called()


Found 148 images belonging to 74 classes.
Class indices: {'001': 0, '002': 1, '003': 2, '004': 3, '005': 4, '006': 5, '007': 6, '008': 7, '009': 8, '010': 9, '011': 10, '012': 11, '013': 12, '014': 13, '015': 14, '016': 15, '017': 16, '018': 17, '019': 18, '020': 19, '021': 20, '022': 21, '023': 22, '024': 23, '025': 24, '026': 25, '027': 26, '028': 27, '029': 28, '030': 29, '031': 30, '032': 31, '033': 32, '034': 33, '035': 34, '036': 35, '037': 36, '038': 37, '039': 38, '040': 39, '041': 40, '042': 41, '043': 42, '044': 43, '045': 44, '046': 45, '047': 46, '048': 47, '049': 48, '050': 49, '051': 50, '052': 51, '053': 52, '054': 53, '055': 54, '056': 55, '057': 56, '058': 57, '059': 58, '060': 59, '061': 60, '062': 61, '063': 62, '064': 63, '065': 64, '066': 65, '067': 66, '068': 67, '069': 68, '070': 69, '071': 70, '072': 71, '073': 72, '074': 73}
10/10 - 7s - 656ms/step - accuracy: 0.9122 - loss: 0.8292

Test accuracy: 0.9122
Test loss: 0.8292
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[



Model loaded successfully
Found 148 images belonging to 74 classes.


  self._warn_if_super_not_called()


Found 148 images belonging to 74 classes.
Class indices: {'075': 0, '076': 1, '077': 2, '078': 3, '079': 4, '080': 5, '081': 6, '082': 7, '083': 8, '084': 9, '085': 10, '086': 11, '087': 12, '088': 13, '089': 14, '090': 15, '091': 16, '092': 17, '093': 18, '094': 19, '095': 20, '096': 21, '097': 22, '098': 23, '099': 24, '100': 25, '101': 26, '102': 27, '103': 28, '104': 29, '105': 30, '106': 31, '107': 32, '108': 33, '109': 34, '110': 35, '111': 36, '112': 37, '113': 38, '114': 39, '115': 40, '116': 41, '117': 42, '118': 43, '119': 44, '120': 45, '121': 46, '122': 47, '123': 48, '124': 49, '125': 50, '126': 51, '127': 52, '128': 53, '129': 54, '130': 55, '131': 56, '132': 57, '133': 58, '134': 59, '135': 60, '136': 61, '137': 62, '138': 63, '139': 64, '140': 65, '141': 66, '142': 67, '143': 68, '144': 69, '145': 70, '146': 71, '147': 72, '148': 73}
10/10 - 6s - 641ms/step - accuracy: 0.8446 - loss: 1.1350

Test accuracy: 0.8446
Test loss: 1.1350
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[



Model loaded successfully
Found 146 images belonging to 74 classes.


  self._warn_if_super_not_called()


Found 146 images belonging to 74 classes.
Class indices: {'.ipynb_checkpoints': 0, '149': 1, '150': 2, '151': 3, '152': 4, '153': 5, '154': 6, '155': 7, '156': 8, '157': 9, '158': 10, '159': 11, '160': 12, '161': 13, '162': 14, '163': 15, '164': 16, '165': 17, '166': 18, '167': 19, '168': 20, '169': 21, '170': 22, '171': 23, '172': 24, '173': 25, '174': 26, '175': 27, '176': 28, '177': 29, '178': 30, '179': 31, '180': 32, '181': 33, '182': 34, '183': 35, '184': 36, '185': 37, '186': 38, '187': 39, '188': 40, '189': 41, '190': 42, '191': 43, '192': 44, '193': 45, '194': 46, '195': 47, '196': 48, '197': 49, '198': 50, '199': 51, '200': 52, '201': 53, '202': 54, '203': 55, '204': 56, '205': 57, '206': 58, '207': 59, '208': 60, '209': 61, '210': 62, '211': 63, '212': 64, '213': 65, '214': 66, '215': 67, '216': 68, '217': 69, '218': 70, '219': 71, '220': 72, '221': 73}
10/10 - 6s - 627ms/step - accuracy: 0.8630 - loss: 1.0845

Test accuracy: 0.8630
Test loss: 1.0845
[1m9/9[0m [32m━━━━━━━