# Data Processing

## Libraries

In [459]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

## Dataset

In [460]:
dataset = pd.read_csv('data.csv')

In [461]:
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

## Data Encoding

In [462]:
X_encoded = pd.get_dummies(X, columns=['Gender'])

In [463]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

## Standard Feature Scaling

In [464]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded)

## ANOVA Feature Selection

In [465]:
k = 5  # Number of features to select
selector = SelectKBest(score_func=f_classif, k=k)
X_selected = selector.fit_transform(X_scaled, y_encoded)

## Training and Test Data Allocation

In [466]:
X_train, X_test, y_train, y_test = train_test_split(X_selected, y_encoded, test_size=0.2)

# Model Training and Prediction

## Cross Validation

In [467]:
# param_grid = [
#     {   'C': [0.001, 0.01, 0.1, 1, 10, 100],
#         'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
# ]

# optimal_params = GridSearchCV(
#     SVC(),
#     param_grid,
#     cv = 5,
#     scoring = 'accuracy',
#     verbose = 0
# )

In [468]:
# optimal_params.fit(X_train, y_train)

In [469]:
# print(optimal_params.best_params_)

## SVM Training

In [470]:
svm = SVC(C = 100, gamma = 0.01, kernel = 'rbf')
svm.fit(X_train, y_train)

In [471]:
y_pred = svm.predict(X_test)

# Results

## Decoding

In [472]:
y_pred_decoded = label_encoder.inverse_transform(y_pred)
y_test_decoded = label_encoder.inverse_transform(y_test)

## Confusion Matrix

In [473]:
# cm = confusion_matrix(y_test, prediction)
# cm_fig = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ['ND', 'PD', 'D'])
# cm_fig.plot()
# plt.show()

In [474]:
print("Confusion Matrix:")
print(confusion_matrix(y_test_decoded, y_pred_decoded))

Confusion Matrix:
[[ 24   1   4]
 [  2   4   5]
 [  2   0 158]]


## Classification Report

In [475]:
print(classification_report(y_test_decoded, y_pred_decoded))

              precision    recall  f1-score   support

           N       0.86      0.83      0.84        29
           P       0.80      0.36      0.50        11
           Y       0.95      0.99      0.97       160

    accuracy                           0.93       200
   macro avg       0.87      0.73      0.77       200
weighted avg       0.93      0.93      0.92       200

