In [16]:
# Data and model imports
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from scipy.stats import uniform
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings('ignore')


In [17]:
# Load the Wine dataset
wine = datasets.load_wine()
X = wine.data
y = wine.target

# Basic data exploration
print("Feature names:", wine.feature_names)
print("Target classes:", wine.target_names)
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)


Feature names: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Target classes: ['class_0' 'class_1' 'class_2']
Shape of X: (178, 13)
Shape of y: (178,)


In [18]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=24
)


In [19]:
# Define hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': [0.1, 1, 'scale', 'auto']
}

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(
    estimator=SVC(),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy'
)
grid_search.fit(X_train, y_train)

# Best model from grid search
best_grid_model = grid_search.best_estimator_
y_pred_grid = best_grid_model.predict(X_test)

print("GridSearchCV Best Hyperparameters:", grid_search.best_params_)
print("GridSearchCV Accuracy:", accuracy_score(y_test, y_pred_grid))
print("GridSearchCV Precision:", precision_score(y_test, y_pred_grid, average='weighted'))
print("GridSearchCV Recall:", recall_score(y_test, y_pred_grid, average='weighted'))
print("GridSearchCV F1-score:", f1_score(y_test, y_pred_grid, average='weighted'))
print("\nClassification Report:\n", classification_report(y_test, y_pred_grid))


GridSearchCV Best Hyperparameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
GridSearchCV Accuracy: 0.9444444444444444
GridSearchCV Precision: 0.9493827160493827
GridSearchCV Recall: 0.9444444444444444
GridSearchCV F1-score: 0.9436036129748098

Classification Report:
               precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       1.00      0.86      0.92        14
           2       0.89      1.00      0.94         8

    accuracy                           0.94        36
   macro avg       0.94      0.95      0.94        36
weighted avg       0.95      0.94      0.94        36



In [20]:
# Define hyperparameter distributions
param_dist = {
    'C': uniform(0.1, 10),
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto'] + list(np.logspace(-3, 3, 50))
}

# Randomized search with 5-fold cross-validation
randomized_search = RandomizedSearchCV(
    estimator=SVC(),
    param_distributions=param_dist,
    n_iter=20,
    cv=5,
    random_state=42,
    scoring='accuracy'
)
randomized_search.fit(X_train, y_train)

# Best model from randomized search
best_rand_model = randomized_search.best_estimator_
y_pred_rand = best_rand_model.predict(X_test)
baseline_svm = SVC()
baseline_svm.fit(X_train, y_train)
y_pred = baseline_svm.predict(X_test)

y_pred = baseline_svm.predict(X_test)
print("RandomizedSearchCV Best Hyperparameters:", randomized_search.best_params_)
print("RandomizedSearchCV Accuracy:", accuracy_score(y_test, y_pred_rand))
print("RandomizedSearchCV Precision:", precision_score(y_test, y_pred_rand, average='weighted'))
print("RandomizedSearchCV Recall:", recall_score(y_test, y_pred_rand, average='weighted'))
print("RandomizedSearchCV F1-score:", f1_score(y_test, y_pred_rand, average='weighted'))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rand))


RandomizedSearchCV Best Hyperparameters: {'C': np.float64(3.845401188473625), 'gamma': np.float64(1.5264179671752334), 'kernel': 'poly'}
RandomizedSearchCV Accuracy: 0.9444444444444444
RandomizedSearchCV Precision: 0.9493827160493827
RandomizedSearchCV Recall: 0.9444444444444444
RandomizedSearchCV F1-score: 0.9436036129748098

Classification Report:
               precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       1.00      0.86      0.92        14
           2       0.89      1.00      0.94         8

    accuracy                           0.94        36
   macro avg       0.94      0.95      0.94        36
weighted avg       0.95      0.94      0.94        36



In [21]:
# Compile results in a DataFrame
results = pd.DataFrame({
    'Model': ['Baseline SVM', 'GridSearchCV', 'RandomizedSearchCV'],
    'Accuracy': [
        
        accuracy_score(y_test, y_pred),
        accuracy_score(y_test, y_pred_grid),
        accuracy_score(y_test, y_pred_rand)
    ],
    'Precision': [
        precision_score(y_test, y_pred, average='weighted'),
        precision_score(y_test, y_pred_grid, average='weighted'),
        precision_score(y_test, y_pred_rand, average='weighted')
    ],
    'Recall': [
        recall_score(y_test, y_pred, average='weighted'),
        recall_score(y_test, y_pred_grid, average='weighted'),
        recall_score(y_test, y_pred_rand, average='weighted')
    ],
    'F1-score': [
        f1_score(y_test, y_pred, average='weighted'),
        f1_score(y_test, y_pred_grid, average='weighted'),
        f1_score(y_test, y_pred_rand, average='weighted')
    ]
})

print(results)


                Model  Accuracy  Precision    Recall  F1-score
0        Baseline SVM  0.777778   0.800505  0.777778  0.781462
1        GridSearchCV  0.944444   0.949383  0.944444  0.943604
2  RandomizedSearchCV  0.944444   0.949383  0.944444  0.943604
