In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn import metrics
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

In [2]:
df = pd.read_csv('features.csv')
# display(df)
svm_train, svm_test = train_test_split(df, test_size=0.2)
X_svm_train, y_svm_train = svm_train.drop(columns=['Breed']), svm_train['Breed']
X_svm_test, y_svm_test = svm_test.drop(columns=['Breed']), svm_test['Breed']

clf = svm.SVC(kernel='rbf') # Linear Kernel
scaler = preprocessing.StandardScaler()
scaler.fit(X_svm_train)

Z_svm_train = scaler.transform(X_svm_train)
Z_svm_test = scaler.transform(X_svm_test)


clf.fit(Z_svm_train, y_svm_train)
y_pred = clf.predict(Z_svm_test)
print(classification_report(y_svm_test, y_pred))

                                precision    recall  f1-score   support

                 affenpinscher       0.77      0.84      0.81        32
                  afghan_hound       1.00      0.94      0.97        52
           african_hunting_dog       0.88      0.96      0.92        24
                      airedale       0.79      0.87      0.83        39
american_staffordshire_terrier       0.62      0.54      0.58        37
                   appenzeller       0.60      0.65      0.62        23
            australian_terrier       0.65      0.80      0.72        30
                       basenji       0.88      0.86      0.87        44
                        basset       0.79      0.81      0.80        37
                        beagle       0.73      0.77      0.75        35
            bedlington_terrier       0.88      1.00      0.93        35
          bernese_mountain_dog       0.88      0.94      0.91        54
              blenheim_spaniel       0.86      1.00      0.93  

In [3]:
# Define parameter grid for grid search
param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'gamma': [0.1, 0.01, 0.001],  # Kernel coefficient
    'kernel': ['rbf', 'linear']  # Kernel type
}

In [4]:
# Load data
df = pd.read_csv('features.csv')

# Split data into train and test sets
svm_train, svm_test = train_test_split(df, test_size=0.2)
X_svm_train, y_svm_train = svm_train.drop(columns=['Breed']), svm_train['Breed']
X_svm_test, y_svm_test = svm_test.drop(columns=['Breed']), svm_test['Breed']

# Scale features
scaler = StandardScaler()
scaler.fit(X_svm_train)
Z_svm_train = scaler.transform(X_svm_train)
Z_svm_test = scaler.transform(X_svm_test)

# Define SVM model
svm_model = SVC()

# Perform grid search
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(Z_svm_train, y_svm_train)

# Get best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Evaluate on test set with best parameters
best_model = grid_search.best_estimator_
y_pred = best_model.predict(Z_svm_test)
print("Classification Report:")
print(classification_report(y_svm_test, y_pred))


Best Parameters: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}
Best Score: 0.8058992284256281
Classification Report:
                                precision    recall  f1-score   support

                 affenpinscher       0.84      0.91      0.88        23
                  afghan_hound       0.93      0.98      0.95        42
           african_hunting_dog       0.92      1.00      0.96        24
                      airedale       0.77      0.90      0.83        41
american_staffordshire_terrier       0.41      0.55      0.47        22
                   appenzeller       0.71      0.52      0.60        29
            australian_terrier       0.85      0.85      0.85        46
                       basenji       0.85      0.85      0.85        40
                        basset       0.88      0.82      0.85        28
                        beagle       0.65      0.89      0.76        38
            bedlington_terrier       0.90      0.93      0.91        40
          bernese_mount

In [None]:
# Get classification report
multiclass_report = classification_report(y_svm_test, y_pred)
print("Multiclass Classification Report:")
print(multiclass_report)

# Get confusion matrix
conf_matrix = confusion_matrix(y_svm_test, y_pred)

# Calculate percentage of correct predictions for each breed
breed_counts = df['Breed'].value_counts()
breed_accuracy = {}
for i in range(len(breed_counts)):
    breed = breed_counts.index[i]
    total_count = breed_counts[i]
    correct_count = conf_matrix[i][i]
    accuracy = correct_count / total_count * 100
    breed_accuracy[breed] = accuracy

# Sort breeds by accuracy
sorted_breeds = sorted(breed_accuracy.items(), key=lambda x: x[1], reverse=True)

print("\nSummary of Predicted Breeds Accuracy:")
for breed, accuracy in sorted_breeds:
    print(f"{breed}: {accuracy:.2f}%")

# Get the worst predicted breed
worst_breed = sorted_breeds[-1][0]
worst_accuracy = sorted_breeds[-1][1]
print("\nThe worst predicted breed is:", worst_breed, "with accuracy:", worst_accuracy)
