# Klasifikasi Penerimaan Murid Prasekolah menggunakan Support Vector Machine (SVM) dan Neural Network (NN)

KELOMPOK 3
- 2210511046 Hanifah Az-Zahra
- 2210511054 Dinda Cantika Putri
- 2210511070 Choirunnisa Zalfaa Nabilah
- 2210511072 Edwina Martha Putri

## Preprocessing Data

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold, cross_val_score

In [3]:
# Menambahkan header
headers = ["parents", "has_nurs", "form", "children", "housing", "finance", "social", "health", "class"]
df = pd.read_csv('nursery/nursery.data', names=headers)
df.head()

Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,class
0,usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend
1,usual,proper,complete,1,convenient,convenient,nonprob,priority,priority
2,usual,proper,complete,1,convenient,convenient,nonprob,not_recom,not_recom
3,usual,proper,complete,1,convenient,convenient,slightly_prob,recommended,recommend
4,usual,proper,complete,1,convenient,convenient,slightly_prob,priority,priority


In [4]:
df.describe()

Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,class
count,12960,12960,12960,12960,12960,12960,12960,12960,12960
unique,3,5,4,4,3,2,3,3,5
top,usual,proper,complete,1,convenient,convenient,nonprob,recommended,not_recom
freq,4320,2592,3240,3240,4320,6480,4320,4320,4320


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12960 entries, 0 to 12959
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   parents   12960 non-null  object
 1   has_nurs  12960 non-null  object
 2   form      12960 non-null  object
 3   children  12960 non-null  object
 4   housing   12960 non-null  object
 5   finance   12960 non-null  object
 6   social    12960 non-null  object
 7   health    12960 non-null  object
 8   class     12960 non-null  object
dtypes: object(9)
memory usage: 911.4+ KB


*missing value*

In [6]:
df.isnull().sum()

parents     0
has_nurs    0
form        0
children    0
housing     0
finance     0
social      0
health      0
class       0
dtype: int64

*duplicate*

In [7]:
df.duplicated().sum()

0

*outlier check*

In [8]:
for col in df.columns:
    print(col)
    print(df[col].value_counts())
    print("\n")

parents
parents
usual          4320
pretentious    4320
great_pret     4320
Name: count, dtype: int64


has_nurs
has_nurs
proper         2592
less_proper    2592
improper       2592
critical       2592
very_crit      2592
Name: count, dtype: int64


form
form
complete      3240
completed     3240
incomplete    3240
foster        3240
Name: count, dtype: int64


children
children
1       3240
2       3240
3       3240
more    3240
Name: count, dtype: int64


housing
housing
convenient    4320
less_conv     4320
critical      4320
Name: count, dtype: int64


finance
finance
convenient    6480
inconv        6480
Name: count, dtype: int64


social
social
nonprob          4320
slightly_prob    4320
problematic      4320
Name: count, dtype: int64


health
health
recommended    4320
priority       4320
not_recom      4320
Name: count, dtype: int64


class
class
not_recom     4320
priority      4266
spec_prior    4044
very_recom     328
recommend        2
Name: count, dtype: int64




*pemilihan kelas dan kolom*

In [9]:
# Filter hanya baris dengan class 'not_recom', 'priority' dan 'spec prior'
df_pilihan = df[df['class'].isin(['not_recom', 'priority', 'spec_prior'])]

# Tampilkan jumlah kelas 'not_recom' dan 'priority'
print(df_pilihan['class'].value_counts())

class
not_recom     4320
priority      4266
spec_prior    4044
Name: count, dtype: int64


In [10]:
# Pilih kolom yang akan digunakan
kolom_pilihan = ['finance', 'social', 'health', 'class']
df_pilihan = df_pilihan[kolom_pilihan].copy()

df_pilihan.head()

Unnamed: 0,finance,social,health,class
1,convenient,nonprob,priority,priority
2,convenient,nonprob,not_recom,not_recom
4,convenient,slightly_prob,priority,priority
5,convenient,slightly_prob,not_recom,not_recom
6,convenient,problematic,recommended,priority


*encoding*

In [11]:
le = LabelEncoder()

df_pilihan['social'] = le.fit_transform(df_pilihan['social']).astype(int)
df_pilihan['finance'] = le.fit_transform(df_pilihan['finance']).astype(int)
df_pilihan['health'] = le.fit_transform(df_pilihan['health']).astype(int)
df_pilihan['class'] = le.fit_transform(df_pilihan['class']).astype(int)

df_pilihan.head()

Unnamed: 0,finance,social,health,class
1,0,0,1,1
2,0,0,0,0
4,0,2,1,1
5,0,2,0,0
6,0,1,2,1


*splitting*

In [13]:
X = df_pilihan[['social', 'finance', 'health']]
y = df_pilihan['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data Training: ", X_train.shape)
print("Data Testing: ", X_test.shape)

Data Training:  (10104, 3)
Data Testing:  (2526, 3)


*Modeling Evaluasi*

In [14]:
from sklearn.svm import SVC

svm_model = SVC(kernel='linear', decision_function_shape='ovo')
svm_model.fit(X_train, y_train)
svm_predict = svm_model.predict(X_test)

In [15]:
df_hasil = X_test.copy()
df_hasil['Label asli'] = y_test.values
df_hasil['Label prediksi'] = svm_predict

In [16]:
df_hasil['Prediksi benar'] = df_hasil['Label asli'] == df_hasil['Label prediksi']
prediksi_benar = df_hasil['Prediksi benar'].sum()

print(df_hasil.head(20))
print(f'Jumlah hasil prediksi yang benar adalah {prediksi_benar}')

       social  finance  health  Label asli  Label prediksi  Prediksi benar
9234        0        0       2           1               1            True
11965       2        1       1           2               2            True
4530        2        1       2           1               1            True
11732       2        1       0           0               0            True
8441        1        1       0           0               0            True
11107       0        0       1           2               2            True
7272        0        0       2           2               1           False
12532       2        0       1           2               2            True
1057        2        1       1           1               2           False
1707        1        1       2           1               1            True
1817        1        1       0           0               0            True
8152        1        1       1           2               2            True
10081       0        0   

In [17]:
# df_hasil.to_csv('Hasil_prediksiSVM_Linear.csv', index='False')

In [18]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print(f"Accuracy Score: {accuracy_score(y_test, svm_predict)}")
print(f"Classification Report:\n {classification_report(y_test, svm_predict)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, svm_predict)}")

Accuracy Score: 0.7268408551068883
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       896
           1       0.61      0.55      0.58       861
           2       0.55      0.60      0.57       769

    accuracy                           0.73      2526
   macro avg       0.72      0.72      0.72      2526
weighted avg       0.73      0.73      0.73      2526

Confusion Matrix:
 [[896   0   0]
 [  0 476 385]
 [  0 305 464]]


In [19]:
from sklearn.model_selection import StratifiedKFold, cross_val_score

sk_fold = StratifiedKFold(n_splits=5)
scores = cross_val_score(svm_model, X, y, cv=sk_fold)

print(f"Cross Validation Score: ", scores)
print(f"Rata Rata Accuracy: {scores.mean():.2f}")
print(f"Standar Deviasi: {scores.std():.2f}")

Cross Validation Score:  [0.68923199 0.70190024 0.72961203 0.7442597  0.77632621]
Rata Rata Accuracy: 0.73
Standar Deviasi: 0.03


In [20]:
from sklearn.svm import SVC

svm_modelrbf = SVC(kernel='rbf', decision_function_shape='ovo')
svm_modelrbf.fit(X_train, y_train)
svm_predrbf = svm_modelrbf.predict(X_test)

In [21]:
df_hasilrbf = X_test.copy()
df_hasilrbf['Label asli'] = y_test.values
df_hasilrbf['Label prediksi'] = svm_predrbf

In [22]:
df_hasilrbf['Prediksi benar'] = df_hasilrbf['Label asli'] == df_hasilrbf['Label prediksi']
prediksi_benarrbf = df_hasilrbf['Prediksi benar'].sum()

print(df_hasilrbf.head(20))
print(f'Jumlah hasil prediksi yang benar adalah {prediksi_benarrbf}')

       social  finance  health  Label asli  Label prediksi  Prediksi benar
9234        0        0       2           1               1            True
11965       2        1       1           2               2            True
4530        2        1       2           1               1            True
11732       2        1       0           0               0            True
8441        1        1       0           0               0            True
11107       0        0       1           2               2            True
7272        0        0       2           2               1           False
12532       2        0       1           2               2            True
1057        2        1       1           1               2           False
1707        1        1       2           1               2           False
1817        1        1       0           0               0            True
8152        1        1       1           2               2            True
10081       0        0   

In [23]:
# df_hasilrbf.to_csv('Hasil_prediksiSVM_rbf.csv', index='False')

In [24]:
print(f"Accuracy Score: {accuracy_score(y_test, svm_predrbf)}")
print(f"Classification Report:\n {classification_report(y_test, svm_predrbf)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, svm_predrbf)}")

Accuracy Score: 0.7387173396674585
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       896
           1       0.70      0.41      0.51       861
           2       0.55      0.81      0.65       769

    accuracy                           0.74      2526
   macro avg       0.75      0.74      0.72      2526
weighted avg       0.76      0.74      0.73      2526

Confusion Matrix:
 [[896   0   0]
 [  0 349 512]
 [  0 148 621]]


In [25]:
sk_fold = StratifiedKFold(n_splits=5)
scores = cross_val_score(svm_modelrbf, X, y, cv=sk_fold)

print(f"Cross Validation Score: ", scores)
print(f"Rata Rata Accuracy: {scores.mean():.2f}")
print(f"Standar Deviasi: {scores.std():.2f}")

Cross Validation Score:  [0.69200317 0.70942201 0.746635   0.76524149 0.8087886 ]
Rata Rata Accuracy: 0.74
Standar Deviasi: 0.04


*Menyimpan model ke pickle*

In [26]:
# import pickle

# pickle.dump(svm_model, open('linear_model', 'wb'))

In [27]:
# loaded_model_linear = pickle.load(open('linear_model', 'rb'))
# result_linear = loaded_model_linear.score(X_test, y_test)
# print(result_linear)

In [28]:
# pickle.dump(svm_modelrbf, open('rbf_model', 'wb'))

In [29]:
# loaded_model_rbf = pickle.load(open('rbf_model', 'rb'))
# result_rbf = loaded_model_rbf.score(X_test, y_test)
# print(result_rbf)

*Visualisasi*

In [30]:
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.decomposition import PCA

# # Kurangi dimensi untuk visualisasi (2D) dengan PCA
# pca = PCA(n_components=2)
# X_train_pca = pca.fit_transform(X_train)
# X_test_pca = pca.transform(X_test)

# # Visualisasi SVM pada data training
# def plot_pca_decision_boundary(X, y, model):
#     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
#     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 
#     xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))
    
#     # Prediksi model pada tiap titik meshgrid
#     Z = model.predict(pca.inverse_transform(np.c_[xx.ravel(), yy.ravel()]))
#     Z = Z.reshape(xx.shape)

#     # Hasil plot area keputusan
#     plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.coolwarm)

#     # Plot data training
#     plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.coolwarm)
#     plt.xlabel('Komponen PCA 1') # namanya ganti
#     plt.ylabel('Komponen PCA 2')
#     plt.title('Visualisasi Batas SVM Dataset Nursery')
#     plt.show()

# # Panggil fungsi untuk memvisualisasikan dengan model SVM terlatih
# plot_pca_decision_boundary(X_train_pca, y_train,svm_model)

In [31]:
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.decomposition import PCA
# from sklearn.svm import SVC

# # Kurangi dimensi untuk visualisasi (2D) dengan PCA
# pca = PCA(n_components=2)
# X_train_pca = pca.fit_transform(X_train)
# X_test_pca = pca.transform(X_test)

# # Melatih model SVM dengan kernel RBF
# svm_model_rbf = SVC(kernel='rbf', gamma='auto')
# svm_model_rbf.fit(X_train, y_train)

# # Visualisasi SVM dengan boundary keputusan
# def plot_pca_decision_boundary(X, y, model):
#     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
#     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 
#     xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))
    
#     # Prediksi model pada tiap titik meshgrid
#     Z = model.predict(pca.inverse_transform(np.c_[xx.ravel(), yy.ravel()]))
#     Z = Z.reshape(xx.shape)

#     # Plot area keputusan
#     plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.coolwarm)

#     # Plot data latih
#     plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.coolwarm)
#     plt.xlabel('Komponen PCA 1')
#     plt.ylabel('Komponen PCA 2')
#     plt.title('Visualisasi Batas Keputusan SVM dengan Kernel RBF')
#     plt.show()

# # Memanggil fungsi untuk memvisualisasikan dengan model SVM terlatih
# plot_pca_decision_boundary(X_train_pca, y_train,svm_modelrbf)

## Modelling NN

In [32]:
from tensorflow.keras.utils import to_categorical
y_train_ann = y_train.copy()
y_train_ann = to_categorical(y_train_ann, num_classes=3)
y_test_ann = y_test.copy()
y_test_ann = to_categorical(y_test_ann, num_classes=3)

In [33]:
print("Dimensi data :\n")
print("X train \t X test \t Y train \t Y test")  
print("%s \t %s \t %s \t %s" % (X_train.shape, X_test.shape, y_train_ann.shape, y_test_ann.shape))

Dimensi data :

X train 	 X test 	 Y train 	 Y test
(10104, 3) 	 (2526, 3) 	 (10104, 3) 	 (2526, 3)


In [34]:
modelANN = Sequential()
modelANN.add(Dense(6, activation='relu', input_dim=X_train.shape[1]))
modelANN.add(Dense(3, activation='relu'))
modelANN.add(Dense(3, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [35]:
modelANN.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [36]:
history = modelANN.fit(X_train, y_train_ann, 
                    epochs=50, 
                    batch_size=32, 
                    validation_split=0.2)

Epoch 1/50
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.5214 - loss: 1.0541 - val_accuracy: 0.7006 - val_loss: 0.9040
Epoch 2/50
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7105 - loss: 0.8501 - val_accuracy: 0.6823 - val_loss: 0.7251
Epoch 3/50
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6863 - loss: 0.7013 - val_accuracy: 0.6962 - val_loss: 0.6411
Epoch 4/50
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7035 - loss: 0.6246 - val_accuracy: 0.7264 - val_loss: 0.5913
Epoch 5/50
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7268 - loss: 0.5786 - val_accuracy: 0.7081 - val_loss: 0.5569
Epoch 6/50
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7173 - loss: 0.5529 - val_accuracy: 0.7264 - val_loss: 0.5345
Epoch 7/50
[1m253/253[0m 

In [37]:
loss, accuracy = modelANN.evaluate(X_test, y_test_ann)
print(f"Test Accuracy: {accuracy:.2f}")

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7241 - loss: 0.4358
Test Accuracy: 0.73


In [38]:
ann_predict = modelANN.predict(X_test)

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [39]:
import numpy as np

y_predict_classes = np.argmax(ann_predict, axis=1)

In [40]:
print("Predictions: ", y_predict_classes[:20])  # Tampilkan 10 prediksi pertama

Predictions:  [1 2 1 0 0 2 1 2 2 1 0 2 2 1 0 0 0 2 1 1]


In [41]:
y_test_labels = np.argmax(y_test_ann, axis=1)
y_pred_labels = np.argmax(ann_predict, axis=1)

In [42]:
df_hasilnn = X_test.copy()
df_hasilnn['Label asli'] = y_test_labels
df_hasilnn['Label prediksi'] = y_pred_labels

In [43]:
df_hasilnn['Prediksi benar'] = df_hasilnn['Label asli'] == df_hasilnn['Label prediksi']
prediksi_benarnn = df_hasilnn['Prediksi benar'].sum()

print(df_hasilnn.head(20))
print(f'Jumlah hasil prediksi yang benar adalah {prediksi_benarnn}')

       social  finance  health  Label asli  Label prediksi  Prediksi benar
9234        0        0       2           1               1            True
11965       2        1       1           2               2            True
4530        2        1       2           1               1            True
11732       2        1       0           0               0            True
8441        1        1       0           0               0            True
11107       0        0       1           2               2            True
7272        0        0       2           2               1           False
12532       2        0       1           2               2            True
1057        2        1       1           1               2           False
1707        1        1       2           1               1            True
1817        1        1       0           0               0            True
8152        1        1       1           2               2            True
10081       0        0   

In [44]:
print(f"Accuracy Score: {accuracy_score(y_test_labels, y_pred_labels)}")
print(f"Classification Report:\n {classification_report(y_test_labels, y_pred_labels)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test_labels, y_pred_labels)}")

Accuracy Score: 0.7268408551068883
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       896
           1       0.61      0.55      0.58       861
           2       0.55      0.60      0.57       769

    accuracy                           0.73      2526
   macro avg       0.72      0.72      0.72      2526
weighted avg       0.73      0.73      0.73      2526

Confusion Matrix:
 [[896   0   0]
 [  0 476 385]
 [  0 305 464]]


## Preprocessing PCA

*encoding*

In [45]:
# Encode semua kolom
le = LabelEncoder()
for col in df.columns:
    df[col] = le.fit_transform(df[col])

df.head()

Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,class
0,2,3,0,0,0,0,0,2,2
1,2,3,0,0,0,0,0,1,1
2,2,3,0,0,0,0,0,0,0
3,2,3,0,0,0,0,2,2,2
4,2,3,0,0,0,0,2,1,1


*data splitting*

In [46]:
X2 = df.drop(columns=['class'])  # Semua fitur kecuali 'class'
y2 = df['class']                 # Target

X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2, random_state=42)

print("Data Training: ", X_train2.shape)
print("Data Testing: ", X_test2.shape)

Data Training:  (10368, 8)
Data Testing:  (2592, 8)


*menentukan atribut pca*

In [47]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.85)
Xtrain_pca = pca.fit_transform(X_train2)
Xtrain_pca

array([[-1.98497442,  0.71286742, -0.1479329 ,  0.23596993, -0.40607873,
         0.74420362],
       [ 2.02994841,  0.49045403,  2.05978838, -0.35596421, -0.80343002,
        -1.44208082],
       [-1.97577936,  1.04894903,  1.21363214,  0.91990725,  0.76928533,
        -0.1276976 ],
       ...,
       [-0.00725245, -2.05869464,  0.51942085, -0.53214721, -0.38625242,
        -0.62119099],
       [ 1.01648895, -0.35139351,  1.55037133, -0.28039433, -0.76323714,
         0.12321463],
       [-2.0035363 , -0.66447437,  0.19081325, -0.41223272,  0.82697987,
         0.09231828]])

In [48]:
Xtest_pca = pca.transform(X_test2)

## Modelling SVM dengan PCA

*svm linear*

In [49]:
from sklearn.svm import SVC

pcalin_model = SVC(kernel='linear', decision_function_shape='ovo')
pcalin_model.fit(Xtrain_pca, y_train2)
pcalin_predict = pcalin_model.predict(Xtest_pca)

In [50]:
df_hasil_pcalin = pd.DataFrame(Xtest_pca, columns=[f'PC{i+1}' for i in range(Xtest_pca.shape[1])])
df_hasil_pcalin['Label asli'] = y_test2.values
df_hasil_pcalin['Label prediksi'] = pcalin_predict

In [51]:
df_hasil_pcalin['Prediksi benar'] = df_hasil_pcalin['Label asli'] == df_hasil_pcalin['Label prediksi']
prediksi_benar_pcalin = df_hasil_pcalin['Prediksi benar'].sum()

print(df_hasil_pcalin.head(20))
print(f'Jumlah hasil prediksi yang benar adalah {prediksi_benar_pcalin}')

         PC1       PC2       PC3       PC4       PC5       PC6  Label asli  \
0  -1.003883 -0.676951  0.195643 -0.957584  0.455575 -0.539710           0   
1  -1.007163  0.365910 -1.528894  0.358484  0.381674 -0.784352           3   
2   0.991934 -0.170556 -0.690842  0.200728  0.835989  0.043029           1   
3   1.998768 -1.216518  1.010093 -0.221034  0.400535 -0.736360           3   
4  -0.988287  1.219637 -0.997608 -0.318605 -0.779541  0.103346           0   
5  -1.003010  0.374555 -1.541931  0.164982 -0.033496  1.388505           1   
6   2.017391  2.044126 -0.503789 -0.187016  0.029173 -1.415581           0   
7  -2.019174 -0.981818 -1.195880  1.963624 -0.084669 -0.262834           1   
8  -1.002749 -2.042783  0.499872  0.185530 -0.036687  1.411964           1   
9  -0.014395 -0.503804 -2.058831 -0.809716  0.423663  0.837643           1   
10  0.027121  1.552960  0.354087  0.414529 -0.835348 -0.083653           1   
11  1.020956  1.533428  0.344968  0.190041  0.844762  0.050449  

In [52]:
# df_hasil_pcalin.to_csv('Hasil_prediksiSVM_Linear_pca.csv', index='False')

In [53]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print(f"Accuracy Score: {accuracy_score(y_test2, pcalin_predict)}")
print(f"Classification Report:\n {classification_report(y_test2, pcalin_predict)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test2, pcalin_predict)}")

Accuracy Score: 0.6979166666666666
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.89      0.88       870
           1       0.64      0.70      0.67       873
           2       0.00      0.00      0.00         2
           3       0.56      0.55      0.55       785
           4       0.00      0.00      0.00        62

    accuracy                           0.70      2592
   macro avg       0.42      0.43      0.42      2592
weighted avg       0.68      0.70      0.69      2592

Confusion Matrix:
 [[772   0   0  98   0]
 [ 22 607   0 244   0]
 [  0   2   0   0   0]
 [ 81 274   0 430   0]
 [  0  62   0   0   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [54]:
from sklearn.model_selection import StratifiedKFold, cross_val_score

sk_fold = StratifiedKFold(n_splits=5)
scores = cross_val_score(pcalin_model, X2, y2, cv=sk_fold)

print(f"Cross Validation Score: ", scores)
print(f"Rata Rata Accuracy: {scores.mean():.2f}")
print(f"Standar Deviasi: {scores.std():.2f}")



Cross Validation Score:  [0.67052469 0.76080247 0.77121914 0.84104938 0.64544753]
Rata Rata Accuracy: 0.74
Standar Deviasi: 0.07


*svm rbf*

In [55]:
from sklearn.svm import SVC

pcarbf_model = SVC(kernel='rbf', decision_function_shape='ovo')
pcarbf_model.fit(Xtrain_pca, y_train2)
pcarbf_predict = pcarbf_model.predict(Xtest_pca)

In [56]:
df_hasil_pcarbf = pd.DataFrame(Xtest_pca, columns=[f'PC{i+1}' for i in range(Xtest_pca.shape[1])])
df_hasil_pcarbf['Label asli'] = y_test2.values
df_hasil_pcarbf['Label prediksi'] = pcarbf_predict

In [57]:
df_hasil_pcarbf['Prediksi benar'] = df_hasil_pcarbf['Label asli'] == df_hasil_pcarbf['Label prediksi']
prediksi_benar_pcarbf = df_hasil_pcarbf['Prediksi benar'].sum()

print(df_hasil_pcarbf.head(20))
print(f'Jumlah hasil prediksi yang benar adalah {prediksi_benar_pcarbf}')

         PC1       PC2       PC3       PC4       PC5       PC6  Label asli  \
0  -1.003883 -0.676951  0.195643 -0.957584  0.455575 -0.539710           0   
1  -1.007163  0.365910 -1.528894  0.358484  0.381674 -0.784352           3   
2   0.991934 -0.170556 -0.690842  0.200728  0.835989  0.043029           1   
3   1.998768 -1.216518  1.010093 -0.221034  0.400535 -0.736360           3   
4  -0.988287  1.219637 -0.997608 -0.318605 -0.779541  0.103346           0   
5  -1.003010  0.374555 -1.541931  0.164982 -0.033496  1.388505           1   
6   2.017391  2.044126 -0.503789 -0.187016  0.029173 -1.415581           0   
7  -2.019174 -0.981818 -1.195880  1.963624 -0.084669 -0.262834           1   
8  -1.002749 -2.042783  0.499872  0.185530 -0.036687  1.411964           1   
9  -0.014395 -0.503804 -2.058831 -0.809716  0.423663  0.837643           1   
10  0.027121  1.552960  0.354087  0.414529 -0.835348 -0.083653           1   
11  1.020956  1.533428  0.344968  0.190041  0.844762  0.050449  

In [58]:
# df_hasil_pcarbf.to_csv('Hasil_prediksiSVM_rbf_pca.csv', index='False')

In [59]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print(f"Accuracy Score: {accuracy_score(y_test2, pcarbf_predict)}")
print(f"Classification Report:\n {classification_report(y_test2, pcarbf_predict)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test2, pcarbf_predict)}")

Accuracy Score: 0.8379629629629629
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.93      0.90       870
           1       0.85      0.84      0.85       873
           2       0.00      0.00      0.00         2
           3       0.79      0.79      0.79       785
           4       0.80      0.06      0.12        62

    accuracy                           0.84      2592
   macro avg       0.66      0.53      0.53      2592
weighted avg       0.84      0.84      0.83      2592

Confusion Matrix:
 [[813   0   0  57   0]
 [ 25 736   0 112   0]
 [  0   1   0   0   1]
 [ 98  68   0 619   0]
 [  0  58   0   0   4]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [60]:
from sklearn.model_selection import StratifiedKFold, cross_val_score

sk_fold = StratifiedKFold(n_splits=5)
scores = cross_val_score(pcarbf_model, X2, y2, cv=sk_fold)

print(f"Cross Validation Score: ", scores)
print(f"Rata Rata Accuracy: {scores.mean():.2f}")
print(f"Standar Deviasi: {scores.std():.2f}")



Cross Validation Score:  [0.75270062 0.88503086 0.83912037 0.88001543 0.68325617]
Rata Rata Accuracy: 0.81
Standar Deviasi: 0.08
