In [4]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
# Load the Iris dataset
iris = datasets.load_iris()
iris_df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                       columns=iris['feature_names'] + ['target'])


In [6]:
# Feature Selection
features = iris['feature_names']

In [7]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(iris_df, test_size=0.2, random_state=42)

In [8]:
# Preprocessing
def preprocess_data(data, features):
    X = data[features]
    y = data['target']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y

In [9]:
# Train an SVM model
def train_svm(X_train, y_train):
    svm_model = SVC(kernel='linear', C=1)
    svm_model.fit(X_train, y_train)
    return svm_model

In [10]:
# Evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    matrix = confusion_matrix(y_test, y_pred)
    return accuracy, report, matrix

In [11]:
# Cross-validate the model
def cross_validate_svm(X, y, folds=5):
    svm_model = SVC(kernel='linear', C=1)
    scores = cross_val_score(svm_model, X, y, cv=folds, scoring='accuracy')
    return scores

In [13]:
# Preprocess the training data
X_train_scaled, y_train = preprocess_data(train_data, features)

In [14]:
# Train an SVM model
svm_model = train_svm(X_train_scaled, y_train)

In [15]:
# Preprocess the test data
X_test_scaled, y_test = preprocess_data(test_data, features)

In [16]:
# Evaluate the model
accuracy, report, confusion_matrix = evaluate_model(svm_model, X_test_scaled, y_test)
print(f'Accuracy of the model on the test set: {accuracy:.2f}')
print('\nClassification Report:')
print(report)
print('\nConfusion Matrix:')
print(confusion_matrix)

Accuracy of the model on the test set: 0.90

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        10
         1.0       0.80      0.89      0.84         9
         2.0       0.90      0.82      0.86        11

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30


Confusion Matrix:
[[10  0  0]
 [ 0  8  1]
 [ 0  2  9]]


In [17]:
# Cross-validate the model
X_all_scaled, y_all = preprocess_data(iris_df, features)
cross_val_scores = cross_validate_svm(X_all_scaled, y_all)
print(f'\nCross-Validation Scores: {cross_val_scores}')


Cross-Validation Scores: [0.96666667 1.         0.93333333 0.93333333 1.        ]
