In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Load the Wine dataset
wine = load_wine()
wine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],
                        columns=np.append(wine['feature_names'], 'target'))

In [3]:
# Feature Selection
features = wine['feature_names']

In [4]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(wine_df, test_size=0.2, random_state=42)

In [5]:
# Preprocessing
def preprocess_data(data, features):
    X = data[features]
    y = data['target']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y

In [6]:
# Train an SVM model
def train_svm(X_train, y_train):
    svm_model = SVC(kernel='linear', C=1)
    svm_model.fit(X_train, y_train)
    return svm_model

In [7]:
# Evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    matrix = confusion_matrix(y_test, y_pred)
    return accuracy, report, matrix

In [8]:
# Cross-validate the model
def cross_validate_svm(X, y, folds=5):
    svm_model = SVC(kernel='linear', C=1)
    scores = cross_val_score(svm_model, X, y, cv=folds, scoring='accuracy')
    return scores

In [9]:
# Preprocess the training data
X_train_scaled, y_train = preprocess_data(train_data, features)

In [10]:
# Train an SVM model
svm_model = train_svm(X_train_scaled, y_train)

In [11]:
# Preprocess the test data
X_test_scaled, y_test = preprocess_data(test_data, features)

In [12]:
# Evaluate the model
accuracy, report, confusion_matrix = evaluate_model(svm_model, X_test_scaled, y_test)
print(f'Accuracy of the model on the test set: {accuracy:.2f}')
print('\nClassification Report:')
print(report)
print('\nConfusion Matrix:')
print(confusion_matrix)

Accuracy of the model on the test set: 0.97

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        14
         1.0       1.00      0.93      0.96        14
         2.0       0.89      1.00      0.94         8

    accuracy                           0.97        36
   macro avg       0.96      0.98      0.97        36
weighted avg       0.98      0.97      0.97        36


Confusion Matrix:
[[14  0  0]
 [ 0 13  1]
 [ 0  0  8]]


In [13]:
# Cross-validate the model
X_all_scaled, y_all = preprocess_data(wine_df, features)
cross_val_scores = cross_validate_svm(X_all_scaled, y_all)
print(f'\nCross-Validation Scores: {cross_val_scores}')


Cross-Validation Scores: [0.94444444 0.97222222 0.94444444 0.97142857 0.94285714]
