In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import itertools
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC

In [None]:
# Load the CSV file
# The CSV file is in the same directory as your Python script
dataset_dir_name = "MangoLeafBD Dataset"

# Get the current directory of the Python script
current_directory = os.getcwd()
# Construct the full path to the CSV file
DATADIR = os.path.join(current_directory, dataset_dir_name)

# List directories in DATADIR
CATEGORIES = [d for d in os.listdir(DATADIR) if os.path.isdir(os.path.join(DATADIR, d))]
print(CATEGORIES)
IMG_SIZE = 100

In [None]:
for category in CATEGORIES:
    path = os.path.join(DATADIR, category)
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path, img))
        plt.imshow(img_array)
        plt.show()
        break
    break

In [None]:
training_data = []


def create_training_data():
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category)
        class_num = CATEGORIES.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                training_data.append([new_array, class_num])
            except Exception as e:
                pass


create_training_data()

In [None]:
print(len(training_data))

In [None]:
lenofimage = len(training_data)

In [None]:
X = []
y = []

for image_array, label in training_data:
    X.append(image_array)
    y.append(label)
X = np.array(X).reshape(lenofimage, -1)

In [None]:
X.shape

**flattening the array**

In [None]:
X[1]

max-scaling


In [None]:
X = X / 255.0

Ex. of flattened array...

In [None]:
X[1]

note : y should be in array form compulsory.


In [None]:
y = np.array(y)

In [None]:
y.shape

Now we are ready with our dependent and independent features, now its time for data modelling

applying train_test_split on our data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

**fitting our data in SVM model**

In [None]:
svc = SVC(kernel="linear", gamma="auto")
svc.fit(X_train, y_train)

**predicting the X_test**

In [None]:
y_pred = svc.predict(X_test)

Evaluation 

In [None]:
print("Accuracy on unknown data is", accuracy_score(y_test, y_pred))

In [None]:
print("Accuracy on unknown data is", classification_report(y_test, y_pred))

In [None]:
classes = CATEGORIES

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 10))
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.colorbar()

tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)


thresh = cm.max() / 2.0
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(
        j,
        i,
        cm[i, j],
        horizontalalignment="center",
        color="white" if cm[i, j] > thresh else "black",
    )

plt.tight_layout()
plt.ylabel("True Label")
plt.xlabel("Predicted Label")

plt.show()

using extracted **features** instead of image array for the model

In [None]:
# Load the CSV file
# The CSV file is in the same directory as your Python script
csv_file_name = "features.csv"

# Get the current directory of the Python script
current_directory = os.getcwd()
# Construct the full path to the CSV file
csv_file_path = os.path.join(current_directory, csv_file_name)

# Read the CSV file into a DataFrame
data = pd.read_csv(csv_file_path, index_col=0)
# Assuming 'classlabel' is the column containing the class labels
X = data.drop("classlabel", axis=1)  # Extract features by dropping the label column
y = data["classlabel"]  # Extract labels
print(X.head())
# Convert the data to numpy arrays
X = X.to_numpy()
y = y.to_numpy()

Now we are ready with our dependent and independent features, now its time for data modelling

applying train_test_split on our data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

**fitting our data in SVM model**

In [None]:
svc = SVC(kernel="linear", gamma="auto", verbose=1)
svc.fit(X_train, y_train)

**predicting the X_test**

In [None]:
y_pred = svc.predict(X_test)

Evaluation 

In [None]:
print("Accuracy on unknown data is", accuracy_score(y_test, y_pred))

In [None]:
print("Accuracy on unknown data is", classification_report(y_test, y_pred))

In [None]:
classes = CATEGORIES

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 10))
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.colorbar()

tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)


thresh = cm.max() / 2.0
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(
        j,
        i,
        cm[i, j],
        horizontalalignment="center",
        color="white" if cm[i, j] > thresh else "black",
    )

plt.tight_layout()
plt.ylabel("True Label")
plt.xlabel("Predicted Label")

plt.show()

using grid search


In [None]:
from sklearn import svm
from sklearn.model_selection import (
    GridSearchCV,
)  # Defining the parameters grid for GridSearchCV

param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": [0.0001, 0.001, 0.1, 1],
    "kernel": ["rbf", "poly"],
}

# Creating a support vector classifier
svc = svm.SVC(probability=True)

# Creating a model using GridSearchCV with the parameters grid
svc = GridSearchCV(svc, param_grid)

In [None]:
svc.fit(X_train, y_train)

**predicting the X_test**

In [None]:
y_pred = svc.predict(X_test)

Evaluation 

In [None]:
print("Accuracy on unknown data is", accuracy_score(y_test, y_pred))

In [None]:
print("Accuracy on unknown data is", classification_report(y_test, y_pred))

In [None]:
classes = CATEGORIES

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 10))
plt.imshow(cm, interpolation="nearest", cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.colorbar()

tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)


thresh = cm.max() / 2.0
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(
        j,
        i,
        cm[i, j],
        horizontalalignment="center",
        color="white" if cm[i, j] > thresh else "black",
    )

plt.tight_layout()
plt.ylabel("True Label")
plt.xlabel("Predicted Label")

plt.show()