In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle #for saving history object
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from datetime import datetime
 

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
train_dir = 'D:/testproject/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train'
val_dir = 'D:/testproject/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid'
test_dir = 'D:/testproject/New Plant Diseases Dataset(Augmented)/test/test'


In [3]:

# Load image file paths and corresponding labels into a DataFrame
def load_data(directory):
    file_paths = []
    labels = []
    for class_label in os.listdir(directory):
        class_dir = os.path.join(directory, class_label)
        for img_file in os.listdir(class_dir):
            file_paths.append(os.path.join(class_dir, img_file))
            labels.append(class_label)
    return file_paths, labels

train_file_paths, train_labels = load_data(train_dir)
val_file_paths, val_labels = load_data(val_dir)

train_df = pd.DataFrame({'File_Path': train_file_paths, 'Label': train_labels})
val_df = pd.DataFrame({'File_Path': val_file_paths, 'Label': val_labels})


In [4]:

# Save the DataFrame to CSV files
train_df.to_csv('train_data.csv', index=False)
val_df.to_csv('val_data.csv', index=False)


In [5]:

# Load and preprocess images function
def preprocess_image(file_path):
    img = load_img(file_path, target_size=(299, 299))
    img_array = img_to_array(img)
    img_array = preprocess_input(img_array)
    return img_array

# Load and preprocess images for train and validation sets
X_train = np.array([preprocess_image(file_path) for file_path in train_df['File_Path']])
X_val = np.array([preprocess_image(file_path) for file_path in val_df['File_Path']])
y_train = np.array(train_df['Label'])
y_val = np.array(val_df['Label'])


MemoryError: Unable to allocate 1.02 MiB for an array with shape (299, 299, 3) and data type float32

In [8]:
# Check available GPUs
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Print information about each GPU
    for gpu in gpus:
        print("Name:", gpu.name)
        print("Type:", gpu.device_type)
else:
    print("No GPU available, using CPU instead")


Name: /physical_device:GPU:0
Type: GPU


In [None]:

# Define the model
model = Sequential([
    Flatten(input_shape=(299, 299, 3)),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(38, activation='softmax')  # assuming you have 38 classes
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
checkpoint = ModelCheckpoint('models/best_model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)


In [None]:

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=16, callbacks=[checkpoint, early_stopping])


In [None]:

# Save history object
with open('history.pkl', 'wb') as f:
    pickle.dump(history.history, f)

# Evaluation and prediction steps remain the same as in your original code
# ...


In [None]:
# Load history object
with open('history[xception02042024].pkl', 'rb') as f:
    loaded_history = pickle.load(f)


<h3><b>Evaluation of Model</b></h3>

In [None]:
# Plot training history
plt.figure(figsize=(10, 5))

# Plot training and validation accuracy
plt.subplot(1, 2, 1)
plt.plot(loaded_history['accuracy'], label='Training Accuracy')
#plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(loaded_history['val_accuracy'], label='Validation Accuracy')
#plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend()
plt.title('Training and Validation Accuracy')

# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(loaded_history['loss'], label='Training Loss')
#plt.plot(history.history['loss'], label='Training Loss')
plt.plot(loaded_history['val_loss'], label='Validation Loss')
#plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')

plt.show()

In [None]:
def predictor(test_gen):    
    y_pred = []
    error_list = []
    error_pred_list = []
    y_true = test_gen.labels
    classes = list(test_gen.class_indices.keys())
    class_count = len(classes)
    errors = 0
    preds = model.predict(test_gen, verbose=1)
    tests = len(preds)    
    for i, p in enumerate(preds):        
        pred_index = np.argmax(p)         
        true_index = test_gen.labels[i]  # labels are integer values        
        if pred_index != true_index: # a misclassification has occurred                                           
            errors = errors + 1
            file = test_gen.filenames[i]
            error_list.append(file)
            error_class = classes[pred_index]
            error_pred_list.append(error_class)
        y_pred.append(pred_index)
            
    acc = (1 - errors/tests) * 100
    msg = f'There were {errors} errors in {tests} tests for an accuracy of {acc:.2f}'
    print(msg)
    
    ypred = np.array(y_pred)
    ytrue = np.array(y_true)
    f1score = f1_score(ytrue, ypred, average='weighted') * 100
    if class_count <= 30:
        cm = confusion_matrix(ytrue, ypred)
        # plot the confusion matrix
        plt.figure(figsize=(12, 8))
        sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)       
        plt.xticks(np.arange(class_count) + .5, classes, rotation=90)
        plt.yticks(np.arange(class_count) + .5, classes, rotation=0)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix")
        plt.show()
    
    clr = classification_report(y_true, y_pred, target_names=classes, digits=4) # create classification report
    print("Classification Report:\n----------------------\n", clr)
    
    return errors, tests, error_list, error_pred_list, f1score, y_pred, y_true

errors, tests, error_list, error_pred_list, f1score, y_pred, y_true = predictor(test_generator)

In [None]:
# Evaluate the model on test generator
test_loss, test_accuracy = model.evaluate(test_generator, verbose=1)
print('Test Accuracy:', test_accuracy)
print('Test Loss:', test_loss)


In [None]:
# Create a mapping from class indices to class names
class_indices = {v: k for k, v in train_generator.class_indices.items()}

# Save class indices to a JSON file
with open('class_indices_xception.json', 'w') as f:
    json.dump(class_indices, f)

<h3><b>Prediction of Disease</b></h3>

In [None]:
# Function to predict disease from an image
def predict_disease(image_path):

    img = load_img(image_path, target_size=(img_size))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    prediction = model.predict(img_array)[0]
    predicted_class_index = np.argmax(prediction)
    disease = class_indices[str(predicted_class_index)]
    confidence = prediction[predicted_class_index]

    return disease, confidence 

In [None]:
# Load class indices
with open('class_indices_xception.json', 'r') as f:
    class_indices = json.load(f)

# Load remedies
with open('remedies.json', 'r') as f:
    remedies= json.load(f)

In [None]:
image_path = r'D:\testproject\New Plant Diseases Dataset(Augmented)\CornCommonRust3.JPG' 

disease, confidence = predict_disease(image_path)

print(f'Predicted Disease: {disease}, Confidence: {confidence}')

if disease.strip() in remedies:
    remedy = remedies[disease]
    print(f'Remedies: {remedy}')
else: 
    print('No remedies found for this disease.')