## importing libraries


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
## Load dataset

from sklearn.datasets import load_wine
wine = load_wine()
wine.keys()


dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])

In [3]:
wine['data']


array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

In [4]:
wine['feature_names']


['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [5]:
print(wine['DESCR'])


.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

:Number of Instances: 178
:Number of Attributes: 13 numeric, predictive attributes and the class
:Attribute Information:
    - Alcohol
    - Malic acid
    - Ash
    - Alcalinity of ash
    - Magnesium
    - Total phenols
    - Flavanoids
    - Nonflavanoid phenols
    - Proanthocyanins
    - Color intensity
    - Hue
    - OD280/OD315 of diluted wines
    - Proline
    - class:
        - class_0
        - class_1
        - class_2

:Summary Statistics:

                                Min   Max   Mean     SD
Alcohol:                      11.0  14.8    13.0   0.8
Malic Acid:                   0.74  5.80    2.34  1.12
Ash:                          1.36  3.23    2.36  0.27
Alcalinity of Ash:            10.6  30.0    19.5   3.3
Magnesium:                    70.0 162.0    99.7  14.3
Total Phenols:                0.98  3.88    2.29  0.63
Flavanoids:                   0.34  5.08    2.03  1.00

In [6]:
df_feature = pd.DataFrame(wine['data'], columns=wine['feature_names'])


In [7]:
df_feature


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [8]:
wine['target_names']


array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [9]:
df_target = pd.DataFrame(wine['target'], columns=['wine_class'])
df_target


Unnamed: 0,wine_class
0,0
1,0
2,0
3,0
4,0
...,...
173,2
174,2
175,2
176,2


In [10]:
## Exploring the dataset
df_feature.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   alcohol                       178 non-null    float64
 1   malic_acid                    178 non-null    float64
 2   ash                           178 non-null    float64
 3   alcalinity_of_ash             178 non-null    float64
 4   magnesium                     178 non-null    float64
 5   total_phenols                 178 non-null    float64
 6   flavanoids                    178 non-null    float64
 7   nonflavanoid_phenols          178 non-null    float64
 8   proanthocyanins               178 non-null    float64
 9   color_intensity               178 non-null    float64
 10  hue                           178 non-null    float64
 11  od280/od315_of_diluted_wines  178 non-null    float64
 12  proline                       178 non-null    float64
dtypes: fl

In [11]:
from sklearn.model_selection import train_test_split

X = df_feature
y = df_target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
from sklearn.svm import SVC


In [13]:
model = SVC()


In [14]:
model.fit(X_train, y_train)


  y = column_or_1d(y, warn=True)


0,1,2
,"C  C: float, default=1.0 Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. For an intuitive visualization of the effects of scaling the regularization parameter C, see :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.",1.0
,"kernel  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf' Specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``. For an intuitive visualization of different kernel types see :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.",'rbf'
,"degree  degree: int, default=3 Degree of the polynomial kernel function ('poly'). Must be non-negative. Ignored by all other kernels.",3
,"gamma  gamma: {'scale', 'auto'} or float, default='scale' Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses  1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features - if float, must be non-negative. .. versionchanged:: 0.22  The default value of ``gamma`` changed from 'auto' to 'scale'.",'scale'
,"coef0  coef0: float, default=0.0 Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'.",0.0
,"shrinking  shrinking: bool, default=True Whether to use the shrinking heuristic. See the :ref:`User Guide `.",True
,"probability  probability: bool, default=False Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `.",False
,"tol  tol: float, default=1e-3 Tolerance for stopping criterion.",0.001
,"cache_size  cache_size: float, default=200 Specify the size of the kernel cache (in MB).",200
,"class_weight  class_weight: dict or 'balanced', default=None Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.",


In [15]:
y_pred = model.predict(X_test)


In [16]:
## Evaluation

from sklearn.metrics import classification_report, accuracy_score

print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.73      0.79      0.76        14
           2       0.57      0.50      0.53         8

    accuracy                           0.81        36
   macro avg       0.77      0.76      0.76        36
weighted avg       0.80      0.81      0.80        36

Accuracy: 0.806


## optimizing hyperparameters

1. kernel
2. C


## Run SVM with different kernels and C values


In [17]:
from sklearn.metrics import accuracy_score

# Linear kernel with C=10
linear_svc = SVC(kernel='linear', C=10.0)
linear_svc.fit(X_train, y_train)
y_pred_linear = linear_svc.predict(X_test)

print("Linear kernel (C=10) accuracy:", accuracy_score(y_test, y_pred_linear))


  y = column_or_1d(y, warn=True)


Linear kernel (C=10) accuracy: 1.0


In [18]:
# RBF kernel with C=11
rbf_svc = SVC(kernel='rbf', C=11.0)
rbf_svc.fit(X_train, y_train)
y_pred_rbf = rbf_svc.predict(X_test)

print("RBF kernel (C=11) accuracy:", accuracy_score(y_test, y_pred_rbf))


RBF kernel (C=11) accuracy: 0.7777777777777778


  y = column_or_1d(y, warn=True)


In [19]:
# Polynomial kernel with C=13
poly_svc = SVC(kernel='poly', C=13.0)
poly_svc.fit(X_train, y_train)
y_pred_poly = poly_svc.predict(X_test)

print("Polynomial kernel (C=13) accuracy:", accuracy_score(y_test, y_pred_poly))


Polynomial kernel (C=13) accuracy: 0.8333333333333334


  y = column_or_1d(y, warn=True)


In [20]:
# Sigmoid kernel with C=13
sigmoid_svc = SVC(kernel='sigmoid', C=13.0)
sigmoid_svc.fit(X_train, y_train)
y_pred_sigmoid = sigmoid_svc.predict(X_test)

print("Sigmoid kernel (C=13) accuracy:", accuracy_score(y_test, y_pred_sigmoid))


Sigmoid kernel (C=13) accuracy: 0.16666666666666666


  y = column_or_1d(y, warn=True)


In [21]:
## GridSearchCV for hyperparameter optimization

from sklearn.model_selection import GridSearchCV

para_grid = {
    'C': [1, 2, 4, 6, 8, 10],
    'kernel': ['rbf', 'linear', 'poly', 'sigmoid'],
}

opt_model = GridSearchCV(SVC(), para_grid, cv=5)

opt_model.fit(X_train, y_train)

y_pred_opt = opt_model.predict(X_test)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [22]:
opt_model.best_params_


{'C': 2, 'kernel': 'linear'}

In [23]:
opt_model.score(X_train, y_train)


1.0

In [24]:
print("Optimized model test accuracy:", accuracy_score(y_test, y_pred_opt))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_opt))


Optimized model test accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [25]:
## Make prediction on a sample

query = df_feature.loc[0:0]
query


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0


In [26]:
result = opt_model.predict(query)

wine_class = wine['target_names'][result[0]]
print(f"Predicted Wine Class: {wine_class}")
print(f"Actual Wine Class: {wine['target_names'][wine['target'][0]]}")


Predicted Wine Class: class_0
Actual Wine Class: class_0


In [27]:
## Find samples from each class

# Get indices for each class
class_0_indices = df_target[df_target['wine_class'] == 0].index.tolist()
class_1_indices = df_target[df_target['wine_class'] == 1].index.tolist()
class_2_indices = df_target[df_target['wine_class'] == 2].index.tolist()

print(f"Class 0 ({wine['target_names'][0]}) samples: {len(class_0_indices)}")
print(f"Class 1 ({wine['target_names'][1]}) samples: {len(class_1_indices)}")
print(f"Class 2 ({wine['target_names'][2]}) samples: {len(class_2_indices)}")


Class 0 (class_0) samples: 59
Class 1 (class_1) samples: 71
Class 2 (class_2) samples: 48


In [28]:
## Test prediction for Class 0

query_class0 = df_feature.loc[class_0_indices[0]:class_0_indices[0]]
result_class0 = opt_model.predict(query_class0)

print(f"Sample Index: {class_0_indices[0]}")
print(f"Predicted Wine Class: {wine['target_names'][result_class0[0]]}")
print(f"Actual Wine Class: {wine['target_names'][wine['target'][class_0_indices[0]]]}")
print(f"Match: {'✓ Correct' if result_class0[0] == wine['target'][class_0_indices[0]] else '✗ Incorrect'}")


Sample Index: 0
Predicted Wine Class: class_0
Actual Wine Class: class_0
Match: ✓ Correct


In [29]:
## Test prediction for Class 1

query_class1 = df_feature.loc[class_1_indices[0]:class_1_indices[0]]
result_class1 = opt_model.predict(query_class1)

print(f"Sample Index: {class_1_indices[0]}")
print(f"Predicted Wine Class: {wine['target_names'][result_class1[0]]}")
print(f"Actual Wine Class: {wine['target_names'][wine['target'][class_1_indices[0]]]}")
print(f"Match: {'✓ Correct' if result_class1[0] == wine['target'][class_1_indices[0]] else '✗ Incorrect'}")


Sample Index: 59
Predicted Wine Class: class_1
Actual Wine Class: class_1
Match: ✓ Correct


In [30]:
## Test prediction for Class 2

query_class2 = df_feature.loc[class_2_indices[0]:class_2_indices[0]]
result_class2 = opt_model.predict(query_class2)

print(f"Sample Index: {class_2_indices[0]}")
print(f"Predicted Wine Class: {wine['target_names'][result_class2[0]]}")
print(f"Actual Wine Class: {wine['target_names'][wine['target'][class_2_indices[0]]]}")
print(f"Match: {'✓ Correct' if result_class2[0] == wine['target'][class_2_indices[0]] else '✗ Incorrect'}")


Sample Index: 130
Predicted Wine Class: class_2
Actual Wine Class: class_2
Match: ✓ Correct


In [38]:
query_class2 = df_feature.loc[class_2_indices[0]:class_2_indices[0]]
result_class2 = opt_model.predict(query_class2)

print(class_2_indices[10])
print(class_1_indices[0])
print(class_0_indices[0])

140
59
0


In [40]:
print(f"Predicted Wine Class: {wine['target_names'][result_class2[0]]}")
print(f"Actual Wine Class: {wine['target_names'][wine['target'][class_2_indices[0]]]}")

Predicted Wine Class: class_2
Actual Wine Class: class_2
