In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [2]:
# Set the seed for numpy
np.random.seed(42)

# Set plots style
plt.style.use('seaborn-v0_8')

# Dataset

In [3]:
# import the dataset
df = pd.read_csv('Dataset/leaf.csv',names=['Class','Specimen Number','Eccentricity','Aspect Ratio','Elongation','Solidity','Stochastic Convexity','Isoperimetric Factor','Maximal Indentation Depth','Lobedness','Average Intensity','Average Contrast','Smoothness','Third moment','Uniformity','Entropy'])
df.drop('Specimen Number', axis=1, inplace=True)
df.head()

Unnamed: 0,Class,Eccentricity,Aspect Ratio,Elongation,Solidity,Stochastic Convexity,Isoperimetric Factor,Maximal Indentation Depth,Lobedness,Average Intensity,Average Contrast,Smoothness,Third moment,Uniformity,Entropy
0,1,0.72694,1.4742,0.32396,0.98535,1.0,0.83592,0.004657,0.003947,0.04779,0.12795,0.016108,0.005232,0.000275,1.1756
1,1,0.74173,1.5257,0.36116,0.98152,0.99825,0.79867,0.005242,0.005002,0.02416,0.090476,0.008119,0.002708,7.5e-05,0.69659
2,1,0.76722,1.5725,0.38998,0.97755,1.0,0.80812,0.007457,0.010121,0.011897,0.057445,0.003289,0.000921,3.8e-05,0.44348
3,1,0.73797,1.4597,0.35376,0.97566,1.0,0.81697,0.006877,0.008607,0.01595,0.065491,0.004271,0.001154,6.6e-05,0.58785
4,1,0.82301,1.7707,0.44462,0.97698,1.0,0.75493,0.007428,0.010042,0.007938,0.045339,0.002051,0.00056,2.4e-05,0.34214


In [4]:
# Split the dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df.drop(['Class'],axis=1),df['Class'], test_size=0.2, random_state=42)

# Print the shapes of the training set and test set
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

Training set shape: (272, 14)
Test set shape: (68, 14)


# Svm

In [5]:
# Create the SVM classifier
svm_clf = SVC()

# Perform cross-validation and calculate accuracy
scores = cross_val_score(svm_clf, df.drop('Class', axis=1), df['Class'], cv=5)
accuracy = scores.mean()

print("Cross-Validation Accuracy:", accuracy)

Cross-Validation Accuracy: 0.2911764705882353


### Grid search

In [6]:
# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.1, 1, 10],
    'decision_function_shape' : ['ovo','ovr']
}

# Create the SVM classifier
svm_clf = SVC()

# Perform grid search
grid_search = GridSearchCV(svm_clf, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Train the SVM classifier with the best parameters
best_svm_clf = SVC(**best_params)
best_svm_clf.fit(X_train, y_train)

# Predict the class labels for the test dataset using the best classifier
best_svm_pred_labels = best_svm_clf.predict(X_test)

# Evaluate the accuracy of the best SVM classifier
best_svm_accuracy = accuracy_score(y_test, best_svm_pred_labels)

print("Best Parameters:", best_params)
print("Best Score:", best_score)
print("Best SVM Accuracy:", best_svm_accuracy)

Best Parameters: {'C': 10, 'decision_function_shape': 'ovo', 'gamma': 0.1, 'kernel': 'linear'}
Best Score: 0.6397979797979797
Best SVM Accuracy: 0.6617647058823529


### Best Results

In [7]:
# Create the SVM classifier
svm_clf = SVC(**best_params)

# Perform cross-validation and calculate accuracy
scores = cross_val_score(svm_clf, df.drop('Class', axis=1), df['Class'], cv=5)
accuracy = scores.mean()

print("Cross-Validation Accuracy:", accuracy)

Cross-Validation Accuracy: 0.6852941176470588
