In [32]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler

Prediction before scaling

In [33]:
#setting up labels for dataset
labels = ['class', 'spec_num', 'eccentr', 'asp_ratio', 'elong', 'solidity', 'stoch_conv', 'iso_factor', 'max_ind_depth', 'lobedness', 'av_intensity', 'av_contr', 'smooth', 'third_mom', 'unif', 'entropy']

#importing data
df = pd.read_csv(r'./leaf/leaf.csv', header = None, names = labels)

# shuffling the dataframe
df = df.sample(frac=1).reset_index()
df = df.iloc[:, 1:17]   # needed to eliminate the old indexes column

#separating y from x
X = df.iloc[:, 2:16]
y = df.iloc[:, 0]

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=100)

In [35]:
# Finding the best hyperparameters
params = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

clf = GridSearchCV(
    estimator=SVC(),
    param_grid=params,
    cv=5,
    n_jobs=5,
    verbose=1
)

clf.fit(X_train, y_train)
print(clf.best_params_)

Fitting 5 folds for each of 100 candidates, totalling 500 fits




{'C': 1000, 'gamma': 1, 'kernel': 'linear'}


In [36]:
# Building and fit the classifier
clf = SVC(kernel='rbf', gamma=0.01, C=1000)
clf.fit(X_train, y_train)

In [37]:
# Make predictions and check the accuracy
predictions = clf.predict(X_test)
print(accuracy_score(y_test, predictions))

0.5764705882352941


Prediction after scaling

In [38]:
#setting up labels for dataset
labels = ['class', 'spec_num', 'eccentr', 'asp_ratio', 'elong', 'solidity', 'stoch_conv', 'iso_factor', 'max_ind_depth', 'lobedness', 'av_intensity', 'av_contr', 'smooth', 'third_mom', 'unif', 'entropy']
#importing data
df = pd.read_csv(r'./leaf/leaf.csv', header = None, names = labels)
# shuffling the dataframe
df = df.sample(frac=1).reset_index()
df = df.iloc[:, 1:17]   # needed to eliminate the old indexes column
#separating y from x
X = df.iloc[:, 2:16]
y = df.iloc[:, 0]

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 100)

In [40]:
column_transformer = make_column_transformer(
    (StandardScaler(), ['eccentr', 'asp_ratio', 'elong', 'solidity', 'stoch_conv', 'iso_factor', 'max_ind_depth', 'lobedness', 'av_intensity', 'av_contr', 'smooth', 'third_mom', 'unif', 'entropy']), remainder='passthrough')

In [41]:
# Transform the training features
X_train = column_transformer.fit_transform(X_train)
X_train = pd.DataFrame(data=X_train, columns=column_transformer.get_feature_names_out())
print(X_train)

<bound method NDFrame._add_numeric_operations.<locals>.mean of      standardscaler__eccentr  standardscaler__asp_ratio  \
0                  -1.146493                  -0.550420   
1                   0.919406                   0.025640   
2                  -1.365607                  -0.490977   
3                   0.448150                  -0.339955   
4                   0.578756                  -0.270814   
..                       ...                        ...   
250                 0.514205                  -0.172085   
251                -0.436076                  -0.417007   
252                 1.281857                   1.358866   
253                 0.419232                  -0.272640   
254                 0.754770                  -0.273020   

     standardscaler__elong  standardscaler__solidity  \
0                -1.122921                  0.061594   
1                 0.520681                  0.572840   
2                -1.408433                  0.305346   
3   

In [42]:
# Finding the best hyperparameters
params = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

clf = GridSearchCV(
    estimator=SVC(),
    param_grid=params,
    cv=5,
    n_jobs=5,
    verbose=1
)

clf.fit(X_train, y_train)
print(clf.best_params_)

Fitting 5 folds for each of 100 candidates, totalling 500 fits




{'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'}


In [43]:
# Building and fit the classifier
clf = SVC(kernel='rbf', gamma=0.01, C=1000)
clf.fit(X_train, y_train)

In [44]:
# Transform the training data
X_test = column_transformer.transform(X_test)
X_test = pd.DataFrame(data=X_test, columns=column_transformer.get_feature_names_out())

In [45]:
# Make predictions and check the accuracy
predictions = clf.predict(X_test)
print(accuracy_score(y_test, predictions))

0.788235294117647
