# Data Processing

## Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

## Dataset

In [None]:
dataset = pd.read_csv('data(1).csv')

In [None]:
dataset.head()

In [None]:
dataset.describe(include='all')

In [None]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
print(X)
X.shape

In [None]:
print(y)
y.shape

## Encode Categorical Data

In [None]:
# One Hot Encoding for Independent Variables
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
print(X[:, 0:4])

In [None]:
# Label Encoder for Dependent Variable
le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
print(y)

## Training and Test Set Allocation

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1000000)

In [None]:
# print(X_train)

In [None]:
# print(X_test)

In [None]:
# print(y_train)

In [None]:
# print(y_test)

## Feature Scaling

In [None]:
sc = StandardScaler()
X_train[:, 3:] = sc.fit_transform(X_train[:, 3:])
X_test[:, 3:] = sc.fit_transform(X_test[:, 3:])

In [None]:
print(X_train)

In [None]:
print(X_test)

# SVM Training and Prediction

## Cross Validation

In [None]:
# param_grid = [
#     {   'C': [0.001, 0.01, 0.1, 1, 10, 100],
#         'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
# ]

# optimal_params = GridSearchCV(
#     SVC(),
#     param_grid,
#     cv = 10,
#     scoring = 'accuracy',
#     verbose = 0
# )

In [None]:
# optimal_params.fit(X_train, y_train)

In [None]:
# print(optimal_params.best_params_)

## SVM Training

In [None]:
svm = SVC(C = 100, gamma = 0.01, kernel = 'rbf')
svm.fit(X_train, y_train)

In [None]:
prediction = svm.predict(X_test)
prediction

# Result Evaluation

## Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, prediction)
cm_fig = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ['ND', 'PD', 'D'])
cm_fig.plot()
plt.show()

## Classification Report

In [None]:
print(classification_report(y_test, prediction))