In [7]:
'''
    Opening notebook remotely on MacOS --> jupyter-notebook --no-browser --port=9090
'''

'\n    Opening notebook remotely on MacOS --> jupyter-notebook --no-browser --port=9090\n'

In [8]:
from sklearn.metrics import classification_report
from keras.models import load_model
import numpy as np
from sklearn.metrics import roc_curve, auc, confusion_matrix
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from matplotlib import pyplot as plt
import seaborn as sns

In [9]:
shared_dir = "/s/bach/a/class/cs435/cs435a/"
image_dir = "CS435_Plant_Data_v2_augment-v2/2023-11-15-231056/"

orig_test = "splits/original/orig_test.csv"
aug_test = "splits/augmented/aug_test.csv"

orig_test_path = shared_dir+image_dir+orig_test
aug_test_path = shared_dir+image_dir+aug_test

In [10]:
orig_model_name = shared_dir + "models/trained_original.h5"
aug_model_name = shared_dir + "models/trained_augmented.h5"
orig_model = load_model(orig_model_name)
aug_model = load_model(aug_model_name)

In [11]:
orig_test_df = pd.read_csv(orig_test_path)
aug_test_df = pd.read_csv(aug_test_path)

datagen = ImageDataGenerator(rescale=1./255) #normalize from [0,255] to [0,1]
orig_test_generator = datagen.flow_from_dataframe(
    dataframe=orig_test_df, 
    x_col='path_to_shared',
    y_col='class_name',  
    target_size=(224, 224),
    batch_size=4,
    class_mode='categorical', 
    shuffle=False
)

aug_test_generator = datagen.flow_from_dataframe(
    dataframe=aug_test_df, 
    x_col='path_to_shared',
    y_col='class_name',  
    target_size=(224, 224),
    batch_size=1,
    class_mode='categorical', 
    shuffle=False
)

Found 7166 validated image filenames belonging to 31 classes.
Found 21580 validated image filenames belonging to 31 classes.


In [12]:
rain_test_df = aug_test_df[aug_test_df['augmentation'].str.contains('rain', case=False, na=False)]
mud_test_df = aug_test_df[aug_test_df['augmentation'].str.contains('mud', case=False, na=False)]

rain_test_generator = datagen.flow_from_dataframe(
    dataframe=rain_test_df, 
    x_col='path_to_shared',
    y_col='class_name',  
    target_size=(224, 224),
    batch_size=1,
    class_mode='categorical', 
    shuffle=False
)

mud_test_generator = datagen.flow_from_dataframe(
    dataframe=mud_test_df, 
    x_col='path_to_shared',
    y_col='class_name',  
    target_size=(224, 224),
    batch_size=1,
    class_mode='categorical', 
    shuffle=False
)

Found 7235 validated image filenames belonging to 31 classes.
Found 7179 validated image filenames belonging to 31 classes.


In [13]:
# Basic metrics: accuracy, precision, recall, f1-score
def evaluate_model_metrics(model, test_generator, predictions):
    predicted_classes = [np.argmax(pred) for pred in predictions]

    true_classes = test_generator.classes

    class_labels = list(test_generator.class_indices.keys())

    report = classification_report(true_classes, predicted_classes, target_names=class_labels, output_dict=True)
    return report

In [14]:
def print_report(report):
    report_df = pd.DataFrame(report).transpose()
    
    print(f"Accuracy: {report_df.loc['accuracy', 'precision']}")
    print(f"Macro Average Precision: {report_df.loc['macro avg', 'precision']}")
    print(f"Macro Average Recall: {report_df.loc['macro avg', 'recall']}")
    print(f"Macro Average F1-Score: {report_df.loc['macro avg', 'f1-score']}")

    display(report_df)

In [13]:
print("*** EVALUATING MODEL TRAINED ON ORIGINAL DATA ***")
orig_orig_predictions = orig_model.predict(orig_test_generator)
orig_aug_predictions = orig_model.predict(aug_test_generator)
orig_rain_predictions = orig_model.predict(rain_test_generator)
orig_mud_predictions = orig_model.predict(mud_test_generator)

orig_orig_test_report = evaluate_model_metrics(orig_model, orig_test_generator, orig_orig_predictions)
orig_aug_test_report = evaluate_model_metrics(orig_model, aug_test_generator, orig_aug_predictions)
orig_rain_test_report = evaluate_model_metrics(orig_model, rain_test_generator, orig_rain_predictions)
orig_mud_test_report = evaluate_model_metrics(orig_model, mud_test_generator, orig_mud_predictions)

*** EVALUATING MODEL TRAINED ON ORIGINAL DATA ***


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
print("*** EVALUATIONS ON ORIGINAL TEST DATA WITH MODEL TRAINED ON ORIGINAL DATA ***")
print_report(orig_orig_test_report)

print("*** EVALUATIONS ON AUGMENTED TEST DATA WITH MODEL TRAINED ON ORIGINAL DATA ***")
print_report(orig_aug_test_report)

print("*** EVALUATIONS ON ONLY RAIN SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON ORIGINAL DATA ***")
print_report(orig_rain_test_report)

print("*** EVALUATIONS ON ONLY MUD SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON ORIGINAL DATA ***")
print_report(orig_mud_test_report)

*** EVALUATIONS ON ORIGINAL TEST DATA WITH MODEL TRAINED ON ORIGINAL DATA ***
Accuracy: 0.9112475579123639
Macro Average Precision: 0.9273727177182105
Macro Average Recall: 0.9123216110051471
Macro Average F1-Score: 0.9111204005454999


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.985915,0.886076,0.933333,237.0
Apple___Black_rot,0.995868,0.902622,0.946955,267.0
Apple___Cedar_apple_rust,0.962185,0.987069,0.974468,232.0
Apple___healthy,0.820261,0.984314,0.894831,255.0
Cherry_(including_sour)___Powdery_mildew,0.994681,0.973958,0.984211,192.0
Cherry_(including_sour)___healthy,0.995215,0.967442,0.981132,215.0
Corn_(maize)___Common_rust_,1.0,0.990826,0.995392,218.0
Corn_(maize)___Northern_Leaf_Blight,0.985366,0.926606,0.955083,218.0
Corn_(maize)___healthy,0.93633,1.0,0.967118,250.0
Grape___Black_rot,0.943089,0.935484,0.939271,248.0


*** EVALUATIONS ON AUGMENTED TEST DATA WITH MODEL TRAINED ON ORIGINAL DATA ***
Accuracy: 0.6505560704355885
Macro Average Precision: 0.8098064551064673
Macro Average Recall: 0.6504238669469813
Macro Average F1-Score: 0.670082182436296


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.962871,0.51455,0.67069,756.0
Apple___Black_rot,0.793556,0.892617,0.840177,745.0
Apple___Cedar_apple_rust,0.556008,0.827273,0.665043,660.0
Apple___healthy,0.805687,0.677291,0.735931,753.0
Cherry_(including_sour)___Powdery_mildew,0.963899,0.423138,0.588106,631.0
Cherry_(including_sour)___healthy,0.991561,0.686131,0.811044,685.0
Corn_(maize)___Common_rust_,0.75486,0.977622,0.85192,715.0
Corn_(maize)___Northern_Leaf_Blight,0.767878,0.916084,0.835459,715.0
Corn_(maize)___healthy,0.888889,0.677188,0.76873,697.0
Grape___Black_rot,0.708565,0.899718,0.792782,708.0


*** EVALUATIONS ON ONLY RAIN SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON ORIGINAL DATA ***
Accuracy: 0.4968901174844506
Macro Average Precision: 0.7687727553630246
Macro Average Recall: 0.4991595364970808
Macro Average F1-Score: 0.49961013346994393


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.892857,0.279851,0.426136,268.0
Apple___Black_rot,0.78733,0.809302,0.798165,215.0
Apple___Cedar_apple_rust,0.320423,0.83105,0.462516,219.0
Apple___healthy,0.68599,0.586777,0.632517,242.0
Cherry_(including_sour)___Powdery_mildew,0.833333,0.202703,0.326087,222.0
Cherry_(including_sour)___healthy,0.990566,0.433884,0.603448,242.0
Corn_(maize)___Common_rust_,0.942529,0.946154,0.944338,260.0
Corn_(maize)___Northern_Leaf_Blight,0.659509,0.881148,0.754386,244.0
Corn_(maize)___healthy,0.818966,0.833333,0.826087,228.0
Grape___Black_rot,0.664032,0.774194,0.714894,217.0


*** EVALUATIONS ON ONLY MUD SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON ORIGINAL DATA ***
Accuracy: 0.5452012815155314
Macro Average Precision: 0.7950719405323996
Macro Average Recall: 0.5402350605597156
Macro Average F1-Score: 0.5271623361692305


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.971963,0.414343,0.581006,251.0
Apple___Black_rot,0.666667,0.95057,0.783699,263.0
Apple___Cedar_apple_rust,0.767045,0.645933,0.701299,209.0
Apple___healthy,0.975,0.457031,0.62234,256.0
Cherry_(including_sour)___Powdery_mildew,1.0,0.16129,0.277778,217.0
Cherry_(including_sour)___healthy,0.987421,0.688596,0.81137,228.0
Corn_(maize)___Common_rust_,0.52784,1.0,0.690962,237.0
Corn_(maize)___Northern_Leaf_Blight,0.73913,0.940711,0.827826,253.0
Corn_(maize)___healthy,1.0,0.146119,0.25498,219.0
Grape___Black_rot,0.5925,0.975309,0.73717,243.0


In [17]:
print("*** EVALUATING MODEL TRAINED ON ORIGINAL DATA ***")

*** EVALUATING MODEL TRAINED ON ORIGINAL DATA ***


In [18]:
aug_orig_predictions = aug_model.predict(orig_test_generator)



In [19]:
aug_aug_predictions = aug_model.predict(aug_test_generator)



In [20]:
aug_rain_predictions = aug_model.predict(rain_test_generator)



In [21]:
aug_mud_predictions = aug_model.predict(mud_test_generator)



In [22]:
aug_orig_test_report = evaluate_model_metrics(aug_model, orig_test_generator, aug_orig_predictions)
aug_aug_test_report = evaluate_model_metrics(aug_model, aug_test_generator, aug_aug_predictions)
aug_rain_test_report = evaluate_model_metrics(aug_model, rain_test_generator, aug_rain_predictions)
aug_mud_test_report = evaluate_model_metrics(aug_model, mud_test_generator, aug_mud_predictions)

In [23]:
print("*** EVALUATIONS ON ORIGINAL TEST DATA WITH MODEL TRAINED ON AUGMENTED DATA ***")
print_report(aug_orig_test_report)

print("*** EVALUATIONS ON AUGMENTED TEST DATA WITH MODEL TRAINED ON AUGMENTED DATA ***")
print_report(aug_aug_test_report)

print("*** EVALUATIONS ON ONLY RAIN SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON AUGMENTED DATA ***")
print_report(aug_rain_test_report)

print("*** EVALUATIONS ON ONLY MUD SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON AUGMENTED DATA ***")
print_report(aug_mud_test_report)

*** EVALUATIONS ON ORIGINAL TEST DATA WITH MODEL TRAINED ON AUGMENTED DATA ***
Accuracy: 0.9296678760814959
Macro Average Precision: 0.9392576863536143
Macro Average Recall: 0.9319632752592761
Macro Average F1-Score: 0.9305437187308214


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.876866,0.991561,0.930693,237.0
Apple___Black_rot,1.0,0.925094,0.961089,267.0
Apple___Cedar_apple_rust,1.0,0.883621,0.938215,232.0
Apple___healthy,0.958175,0.988235,0.972973,255.0
Cherry_(including_sour)___Powdery_mildew,0.994792,0.994792,0.994792,192.0
Cherry_(including_sour)___healthy,0.990741,0.995349,0.993039,215.0
Corn_(maize)___Common_rust_,1.0,0.963303,0.981308,218.0
Corn_(maize)___Northern_Leaf_Blight,0.947826,1.0,0.973214,218.0
Corn_(maize)___healthy,0.995984,0.992,0.993988,250.0
Grape___Black_rot,0.995238,0.842742,0.912664,248.0


*** EVALUATIONS ON AUGMENTED TEST DATA WITH MODEL TRAINED ON AUGMENTED DATA ***
Accuracy: 0.9257182576459685
Macro Average Precision: 0.9361833739752538
Macro Average Recall: 0.926610081205684
Macro Average F1-Score: 0.9268874239321236


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.906716,0.964286,0.934615,756.0
Apple___Black_rot,1.0,0.912752,0.954386,745.0
Apple___Cedar_apple_rust,1.0,0.85,0.918919,660.0
Apple___healthy,0.944872,0.978752,0.961513,753.0
Cherry_(including_sour)___Powdery_mildew,0.976599,0.992076,0.984277,631.0
Cherry_(including_sour)___healthy,0.98827,0.983942,0.986101,685.0
Corn_(maize)___Common_rust_,1.0,0.953846,0.976378,715.0
Corn_(maize)___Northern_Leaf_Blight,0.935864,1.0,0.96687,715.0
Corn_(maize)___healthy,0.997114,0.991392,0.994245,697.0
Grape___Black_rot,0.989147,0.90113,0.943089,708.0


*** EVALUATIONS ON ONLY RAIN SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON AUGMENTED DATA ***
Accuracy: 0.9212163096060816
Macro Average Precision: 0.9339895621808466
Macro Average Recall: 0.9228538519062458
Macro Average F1-Score: 0.9235743726401404


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.918819,0.929104,0.923933,268.0
Apple___Black_rot,1.0,0.902326,0.948655,215.0
Apple___Cedar_apple_rust,1.0,0.835616,0.910448,219.0
Apple___healthy,0.962963,0.966942,0.964948,242.0
Cherry_(including_sour)___Powdery_mildew,0.956332,0.986486,0.971175,222.0
Cherry_(including_sour)___healthy,0.995781,0.975207,0.985386,242.0
Corn_(maize)___Common_rust_,1.0,0.961538,0.980392,260.0
Corn_(maize)___Northern_Leaf_Blight,0.931298,1.0,0.964427,244.0
Corn_(maize)___healthy,1.0,0.995614,0.997802,228.0
Grape___Black_rot,1.0,0.921659,0.959233,217.0


*** EVALUATIONS ON ONLY MUD SPATTER AUGMENTED IMAGES WITH MODEL TRAINED ON AUGMENTED DATA ***
Accuracy: 0.926312856943864
Macro Average Precision: 0.9375968134503275
Macro Average Recall: 0.9256596681930618
Macro Average F1-Score: 0.9269790635326847


Unnamed: 0,precision,recall,f1-score,support
Apple___Apple_scab,0.924528,0.976096,0.949612,251.0
Apple___Black_rot,1.0,0.908745,0.952191,263.0
Apple___Cedar_apple_rust,1.0,0.827751,0.905759,209.0
Apple___healthy,0.916058,0.980469,0.94717,256.0
Cherry_(including_sour)___Powdery_mildew,0.981818,0.995392,0.988558,217.0
Cherry_(including_sour)___healthy,0.978166,0.982456,0.980306,228.0
Corn_(maize)___Common_rust_,1.0,0.936709,0.96732,237.0
Corn_(maize)___Northern_Leaf_Blight,0.930147,1.0,0.96381,253.0
Corn_(maize)___healthy,0.995392,0.986301,0.990826,219.0
Grape___Black_rot,0.974468,0.942387,0.958159,243.0


In [24]:
# More advanced metrics: Confusion Matrix
def create_model_confusion_matrix(model, test_generator, model_name, test_df, predictions):
    predicted_classes = np.argmax(predictions, axis=1)

    label_map = {label: index for index, label in enumerate(test_generator.class_indices)}
    true_classes = test_df['class_name'].map(label_map).values

    conf_matrix = confusion_matrix(true_classes, predicted_classes)

    plt.figure(figsize=(20, 20))
    sns.heatmap(conf_matrix, annot=True, fmt='g')
    plt.xlabel('Predicted labels')
    plt.ylabel('True labels')
    plt.title('Confusion Matrix')

    # plt.savefig(model_name+'_aug_confusion_matrix.png')
    plt.show()

In [27]:
create_model_confusion_matrix(orig_model, orig_test_generator, 
                              "orig model on orig test", orig_test_df, 
                              orig_orig_predictions)

NameError: name 'orig_orig_predictions' is not defined