In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold

In [2]:
def evaluate_svm_model(X, y, description):
    svm_model = SVC(kernel='poly', random_state=42)
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    cv_scores = cross_val_score(svm_model, X, y, cv=kf, scoring='accuracy')
    print(f'{description} - Mean Accuracy: {np.mean(cv_scores):.4f}, Std Dev: {np.std(cv_scores):.4f}')
    print()

def pca_model(X,y,des1,n=None):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    p1 = PCA(n_components=n)
    x_pca = p1.fit_transform(X_scaled)
    evaluate_svm_model(x_pca,y,des1)

In [3]:
print("For abalone dataset\n")
abalone_data = pd.read_csv('abalone1.csv')

le = LabelEncoder()
abalone_data['Sex'] = le.fit_transform(abalone_data['Sex'])

X1 = abalone_data.drop('Rings', axis=1)
y1 = abalone_data['Rings']

evaluate_svm_model(X1, y1, "Original Data")
pca_model(X1, y1, "PCA Transformed Data (All Components)")
pca_model(X1, y1, "PCA Transformed Data (4 Components)",4)

For abalone dataset

Original Data - Mean Accuracy: 0.2502, Std Dev: 0.0240

PCA Transformed Data (All Components) - Mean Accuracy: 0.2451, Std Dev: 0.0182

PCA Transformed Data (4 Components) - Mean Accuracy: 0.2418, Std Dev: 0.0197



In [4]:
print("For Breast Cancer dataset\n")
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()

X2 = pd.DataFrame(cancer_data.data, columns=cancer_data.feature_names)
y2 = pd.Series(cancer_data.target)

evaluate_svm_model(X2, y2, "Original Data")
pca_model(X2, y2, "PCA Transformed All Components")
pca_model(X2, y2, "PCA Transformed 20 Components",20)
pca_model(X2, y2, "PCA Transformed 10 Components",10)

For Breast Cancer dataset

Original Data - Mean Accuracy: 0.9068, Std Dev: 0.0505

PCA Transformed All Components - Mean Accuracy: 0.9068, Std Dev: 0.0263

PCA Transformed 20 Components - Mean Accuracy: 0.9068, Std Dev: 0.0263

PCA Transformed 10 Components - Mean Accuracy: 0.9086, Std Dev: 0.0249

