In [194]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Data Preprocessing

In [195]:
dataset = pd.read_csv('data.csv')

In [196]:
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

## Data Encoding

In [197]:
X_encoded = pd.get_dummies(X, columns=['Gender'])

In [198]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

## Standard Feature Scaling

In [199]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded)

## ANOVA F-Value Feature Selection

In [200]:
k = 5
selector = SelectKBest(score_func=f_classif, k=k)
X_selected = selector.fit_transform(X_scaled, y_encoded)

## Training and Test Data Allocation

In [201]:
X_train, X_test, y_train, y_test = train_test_split(X_selected, y_encoded, test_size=0.2)

# Model Training and Prediction

## Hyperparameter Optimization

In [202]:
# param_grid = [
#     {   'C': [0.001, 0.01, 0.1, 1, 10, 100],
#         'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
# ]

# optimal_params = GridSearchCV(
#     SVC(),
#     param_grid,
#     cv = 5,
#     scoring = 'accuracy',
#     verbose = 0
# )

In [203]:
# optimal_params.fit(X_train, y_train)
# print(optimal_params.best_params_)

## Training

In [204]:
svm = SVC(C = 100, gamma = 0.01, kernel = 'rbf')
svm.fit(X_train, y_train)

## Prediction

In [205]:
y_pred = svm.predict(X_test)

# Results

## Decoding

In [206]:
y_pred_decoded = label_encoder.inverse_transform(y_pred)
y_test_decoded = label_encoder.inverse_transform(y_test)

## Confusion Matrix

In [207]:
# cm = confusion_matrix(y_test, prediction)
# cm_fig = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ['ND', 'PD', 'D'])
# cm_fig.plot()
# plt.show()

In [208]:
print("Confusion Matrix:")
print(confusion_matrix(y_test_decoded, y_pred_decoded))

Confusion Matrix:
[[ 23   0   3]
 [  2   2   4]
 [  2   2 162]]


## Classification Report

In [209]:
print(classification_report(y_test_decoded, y_pred_decoded))

              precision    recall  f1-score   support

           N       0.85      0.88      0.87        26
           P       0.50      0.25      0.33         8
           Y       0.96      0.98      0.97       166

    accuracy                           0.94       200
   macro avg       0.77      0.70      0.72       200
weighted avg       0.93      0.94      0.93       200

