In [9]:
# Load necessary libraries
library(e1071)       # For SVM and Naive Bayes
library(caret)       # For model evaluation
library(class)       # For kNN
library(dplyr)       # For data manipulation

# Load the wine dataset
wine_data <- read.csv("wine.data", header = FALSE)
colnames(wine_data) <- c("class", "Alcohol", "Malic.acid", "Ash", "Alcalinity.of.ash", 
                         "Magnesium", "Total.phenols", "Flavanoids", "Nonflavanoid.phenols", 
                         "Proanthocyanins", "Color.intensity", "Hue", 
                         "OD280.OD315.of.diluted.wines", "Proline")

# Convert class to factor
wine_data$class <- as.factor(wine_data$class)

# Select top 7 features based on PCA loading plot
selected_features <- c("Total.phenols", "Flavanoids", "OD280.OD315.of.diluted.wines",
                       "Proanthocyanins", "Nonflavanoid.phenols", "Hue", "Proline")

wine_selected <- wine_data[, c("class", selected_features)]

# Split data into training/testing (80/20)
set.seed(123)
train_index <- createDataPartition(wine_selected$class, p = 0.8, list = FALSE)
train_data <- wine_selected[train_index, ]
test_data <- wine_selected[-train_index, ]

# --- SVM Models (Selected Features) ---
# 1. Linear Kernel
tune_linear <- tune.svm(class ~ ., data = train_data, kernel = "linear",
                        cost = 10^(-2:2))
best_linear <- tune_linear$best.model

# 2. Radial Kernel
tune_radial <- tune.svm(class ~ ., data = train_data, kernel = "radial",
                        cost = 10^(-1:2), gamma = c(0.1, 1, 10))
best_radial <- tune_radial$best.model

# --- kNN ---
# Scale features
train_features_scaled <- scale(train_data[, -1])
test_features_scaled <- scale(test_data[, -1])

# Train kNN (k=5)
knn_pred <- knn(train_features_scaled, test_features_scaled, train_data$class, k = 5)

# --- Evaluate Models ---
# SVM Linear
svm_linear_pred <- predict(best_linear, test_data)
svm_linear_metrics <- confusionMatrix(svm_linear_pred, test_data$class)

# SVM Radial
svm_radial_pred <- predict(best_radial, test_data)
svm_radial_metrics <- confusionMatrix(svm_radial_pred, test_data$class)

# kNN
knn_metrics <- confusionMatrix(knn_pred, test_data$class)

# --- Print Results ---
cat("SVM Linear (Best C =", best_linear$cost, "):\n")
print(svm_linear_metrics$byClass[, c("Precision", "Recall", "F1")])

cat("\nSVM Radial (Best C =", best_radial$cost, "Best gamma =", best_radial$gamma, "):\n")
print(svm_radial_metrics$byClass[, c("Precision", "Recall", "F1")])

cat("\nkNN (k=5):\n")
print(knn_metrics$byClass[, c("Precision", "Recall", "F1")])


SVM Linear (Best C = 10 ):
         Precision    Recall        F1
Class: 1 0.8461538 1.0000000 0.9166667
Class: 2 1.0000000 0.7857143 0.8800000
Class: 3 0.9000000 1.0000000 0.9473684

SVM Radial (Best C = 10 Best gamma = 0.1 ):
         Precision    Recall        F1
Class: 1 0.9166667 1.0000000 0.9565217
Class: 2 1.0000000 0.8571429 0.9230769
Class: 3 0.9000000 1.0000000 0.9473684

kNN (k=5):
         Precision    Recall        F1
Class: 1 0.8461538 1.0000000 0.9166667
Class: 2 1.0000000 0.7857143 0.8800000
Class: 3 0.9000000 1.0000000 0.9473684


Among the three models trained on the top 7 PCA-selected features, the SVM with a radial kernel exhibited the 
best overall performance, achieving the highest precision, recall, and F1-scores for Class 1 and Class 2, and 
matching Class 3 performance with the others. Specifically, it improved the F1-score for Class 2 (0.9231) 
compared to both the linear SVM (0.8800) and kNN (0.8800). The linear SVM and kNN produced nearly identical 
results, with slightly lower recall for Class 2, indicating that while both models classified Class 1 and Class 3 
accurately, they struggled more with Class 2. Thus, the radial SVM demonstrated superior generalization for this 
classification task.