In [None]:
from google.colab import drive
drive.mount('/content/drive/')

# Downloading and extracting dataset

In [None]:
! pip install kaggle

In [None]:
import os

# Check if kaggle.json exists
if not os.path.exists('/root/.kaggle/kaggle.json'):
    from google.colab import files
    files.upload()

    # Copy the uploaded file to ~/.kaggle folder
    ! mkdir -p ~/.kaggle
    ! cp kaggle.json ~/.kaggle/
    ! chmod 600 ~/.kaggle/kaggle.json
    print("kaggle.json file uploaded successfully.")
else:
    print("kaggle.json file already exists. Skipping upload.")

In [None]:
import os
import shutil

# Specify the path to the folder to be deleted
folder_to_delete = '/content/dataset'

# Check if the folder exists before attempting to delete it
if os.path.exists(folder_to_delete):
    # Use shutil.rmtree() to delete the folder and its contents recursively
    shutil.rmtree(folder_to_delete)
    print("Folder 'dataset' and its contents have been deleted.")
else:
    print("Folder 'dataset' does not exist.")

In [None]:
! kaggle datasets download -d saroz014/plant-disease

In [None]:
! unzip plant-disease.zip

# Data Exploration and Visualization

In [None]:
import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

In [None]:
import shutil

# Define the root directory of your dataset
root_dir = '/content/dataset/dataset/train'

# Define the directory for the validation set
valid_dir = '/content/dataset/dataset/valid'

# List all the subdirectories (classes) in the root directory
subdirs = [subdir for subdir in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, subdir))]

# Iterate through each subdirectory
for subdir in subdirs:
    subdir_path = os.path.join(root_dir, subdir)

    # List all the files (images) in the subdirectory
    files = [file for file in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, file))]

    # Calculate the number of files to move to the validation set (20% of total files)
    num_files_valid = int(0.2 * len(files))

    # Create the corresponding subdirectory in the validation directory if it doesn't exist
    valid_subdir = os.path.join(valid_dir, subdir)
    os.makedirs(valid_subdir, exist_ok=True)

    # Move the files to the validation set
    for i in range(num_files_valid):
        file_to_move = files[i]
        src_path = os.path.join(subdir_path, file_to_move)
        dest_path = os.path.join(valid_subdir, file_to_move)
        shutil.move(src_path, dest_path)
        print(f'Moved {file_to_move} to {valid_subdir}')

print('Validation data split completed.')

In [None]:
# getting the training data from the directory, and dividing it into batches
train_dir = root_dir
train_data = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    batch_size=32,
    image_size=(256,256), shuffle=True
)

In [None]:
# getting the validation data from the directory, and dividing it into batches
valid_data = tf.keras.preprocessing.image_dataset_from_directory(
    valid_dir,
    batch_size=32,
    image_size=(256,256), shuffle=True
)

In [None]:
print(f'The total nummber of classes is {len(train_data.class_names)}')
class_labels = train_data.class_names
class_labels

# Load images and labels

In [None]:
import os
import numpy as np
from keras.preprocessing import image
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
# Function to load subset of resized images and labels
def load_subset_resized_images_and_labels(directory, subset_size=1000, target_size=(64, 64)):
    images, labels = [], []
    classes = os.listdir(directory)
    print("Loading subset of resized images...")
    for class_name in classes:
        class_dir = os.path.join(directory, class_name)
        image_files = [os.path.join(class_dir, img) for img in os.listdir(class_dir)]
        if len(image_files) <= subset_size:
            selected_files = image_files
        else:
            selected_files = np.random.choice(image_files, subset_size, replace=False)
        for img_path in selected_files:
            img = image.load_img(img_path, target_size=target_size)
            img_array = image.img_to_array(img)
            images.append(img_array.flatten())  # Flatten image into a vector
            labels.append(classes.index(class_name))  # Assign label
    print("Subset of resized images loaded successfully.")
    return np.array(images), np.array(labels)


# Load subset of resized training and validation data
train_images, train_labels = load_subset_resized_images_and_labels(train_dir)
valid_images, valid_labels = load_subset_resized_images_and_labels(valid_dir)

In [None]:
len(train_images),len(valid_images) # (26059, 8675)

# Hyper parameter tuning

In [None]:
# perform hyper parameter tuning on smaller subset
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import numpy as np

subset_size = 2000
subset_train_images, _, subset_train_labels, _ = train_test_split(train_images, train_labels, train_size=subset_size, stratify=train_labels)

len(subset_train_images),len(subset_train_labels)

In [None]:
# perform hyper parameter tuning on smaller subset
# Define a smaller parameter grid for testing
param_grid_small = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.01, 0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid']
}

# Initialize the SVM classifier
svm_classifier = SVC()

# Initialize GridSearchCV with smaller parameter grid
grid_search_small = GridSearchCV(estimator=svm_classifier, param_grid=param_grid_small, cv=3, scoring='accuracy')

# Perform grid search on the subset of data
print("Performing grid search on a subset of data...")
grid_search_small.fit(subset_train_images, subset_train_labels)
print("Grid search on the subset of data completed.")

# Get the best parameters
best_params_small = grid_search_small.best_params_
print("Best parameters on the subset of data:", best_params_small)

In [None]:
# Get the CV results
cv_results = grid_search_small.cv_results_

# Get the mean test scores for each parameter combination
mean_test_scores = cv_results['mean_test_score']

# Get the parameters for each parameter combination
params = cv_results['params']

# Sort the mean test scores in descending order
sorted_indices = np.argsort(mean_test_scores)[::-1]

# Print the summary of a few best parameter combinations
num_combinations = 5  # Number of best combinations to display
print(f"Summary of {num_combinations} best parameter combinations:")
for i in range(num_combinations):
    idx = sorted_indices[i]
    print(f"Combination {i+1}: Mean Test Score: {mean_test_scores[idx]}, Parameters: {params[idx]}")


# SVM Model Training and Evaluation



In [None]:
# Train the model on the full training data with the best parameters found on the subset
print("Training SVM model with best parameters...")
best_svm_classifier_small = SVC(**best_params_small)
best_svm_classifier_small.fit(train_images, train_labels)
print("Training SVM model with best parameters completed...")

In [None]:
# Predict on the validation set
val_predictions_svm_tuned = best_svm_classifier_small.predict(valid_images)

# Evaluate the tuned model
print('Validation set accuracy :', accuracy_score(valid_labels, val_predictions_svm_tuned))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Generate confusion matrix
conf_matrix = confusion_matrix(valid_labels, val_predictions_svm_tuned)
print("Confusion Matrix:")
print(conf_matrix)

print("\n")

# Generate classification report
class_report = classification_report(valid_labels, val_predictions_svm_tuned)
print("Classification Report:")
print(class_report)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 10))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# Evaluating with test data

In [None]:
# getting the test data from the directory, and dividing it into batches
test_dir = '/content/dataset/dataset/test'

test_data = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    batch_size=32,
    image_size=(256,256), shuffle=True
)

In [None]:
test_images, test_labels = load_subset_resized_images_and_labels(test_dir)

len(test_images),len(test_labels)

In [None]:
# Predict on the test set
test_predictions_svm_tuned = best_svm_classifier_small.predict(test_images)

# Evaluate the tuned model
print('Test set accuracy :', accuracy_score(test_labels, test_predictions_svm_tuned))

In [None]:
# Generate confusion matrix
test_conf_matrix = confusion_matrix(valid_labels, val_predictions_svm_tuned)
print("Confusion Matrix:")
print(test_conf_matrix)

print("\n")

# Generate classification report
test_class_report = classification_report(valid_labels, val_predictions_svm_tuned)
print("Classification Report:")
print(test_class_report)

print("\n")

plt.figure(figsize=(12, 10))
sns.heatmap(test_conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()