Download apples_and_oranges.csv and implement SVM in it to predict the class of fruit based on its weight and size. Print the confusion matrix to calculate its accuracy.

In [1]:
#Import Required Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
# Load the CSV file into a pandas DataFrame
data = pd.read_csv('apples_and_oranges.csv')
data.head(10)

Unnamed: 0,Weight,Size,Class
0,69,4.39,orange
1,69,4.21,orange
2,65,4.09,orange
3,72,5.85,apple
4,67,4.7,orange
5,73,5.68,apple
6,70,5.56,apple
7,75,5.11,apple
8,74,5.36,apple
9,65,4.27,orange


In [3]:
data.shape

(40, 3)

In [5]:
# Split features and labels
X = data[['Weight', 'Size']]
y = data['Class']

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Create an SVM classifier
svm_classifier = SVC(kernel='linear')

In [8]:
# Train the classifier on the training data
svm_classifier.fit(X_train, y_train)

In [9]:
# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

In [10]:
# Calculate accuracy and print confusion matrix
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [11]:
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 1.0
Confusion Matrix:
[[3 0]
 [0 5]]


### Hyperparameter Tuning
#### GridSearchCV

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
# Define the parameter grid for tuning
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']  # For 'rbf' and 'poly' kernels
}

In [14]:
# Create an SVM classifier
svm_classifier = SVC()

In [15]:
# Create GridSearchCV
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, n_jobs=-1)

In [16]:
# Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)

In [17]:
# Get the best parameters and best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

In [18]:
# Make predictions on the test data using the best model
y_pred = best_model.predict(X_test)

In [19]:
# Calculate accuracy and print confusion matrix
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [20]:
print("Best Parameters:", best_params)
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)

Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 1.0
Confusion Matrix:
[[3 0]
 [0 5]]
