# Tugas

In [None]:
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
# Muat dataset
try:
    df = pd.read_csv('/content/drive/MyDrive/ML/voice.csv')
except FileNotFoundError:
    print("File 'voice.csv' tidak ditemukan.")

# --- Preprocessing ---
# a. Label Encoding (Mengubah 'male'/'female' menjadi 1/0)
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# b. Pisahkan Fitur (X) dan Target (y)
X = df.drop('label', axis=1)
y = df['label']

print("Data berhasil di-load dan di-preprocess.")
print(f"Jumlah sampel: {X.shape[0]}, Jumlah fitur: {X.shape[1]}")
df.head()

Data berhasil di-load dan di-preprocess.
Jumlah sampel: 3168, Jumlah fitur: 20


Unnamed: 0,meanfreq,sd,median,Q25,Q75,IQR,skew,kurt,sp.ent,sfm,...,centroid,meanfun,minfun,maxfun,meandom,mindom,maxdom,dfrange,modindx,label
0,0.059781,0.064241,0.032027,0.015071,0.090193,0.075122,12.863462,274.402906,0.893369,0.491918,...,0.059781,0.084279,0.015702,0.275862,0.007812,0.007812,0.007812,0.0,0.0,1
1,0.066009,0.06731,0.040229,0.019414,0.092666,0.073252,22.423285,634.613855,0.892193,0.513724,...,0.066009,0.107937,0.015826,0.25,0.009014,0.007812,0.054688,0.046875,0.052632,1
2,0.077316,0.083829,0.036718,0.008701,0.131908,0.123207,30.757155,1024.927705,0.846389,0.478905,...,0.077316,0.098706,0.015656,0.271186,0.00799,0.007812,0.015625,0.007812,0.046512,1
3,0.151228,0.072111,0.158011,0.096582,0.207955,0.111374,1.232831,4.177296,0.963322,0.727232,...,0.151228,0.088965,0.017798,0.25,0.201497,0.007812,0.5625,0.554688,0.247119,1
4,0.13512,0.079146,0.124656,0.07872,0.206045,0.127325,1.101174,4.333713,0.971955,0.783568,...,0.13512,0.106398,0.016931,0.266667,0.712812,0.007812,5.484375,5.476562,0.208274,1


In [None]:
print("--- Memulai Eksperimen 1: Split 70:30 ---")

# 1. Split data 70:30
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.3, random_state=42)

# 2. Scaling data
scaler1 = StandardScaler()
X_train1_scaled = scaler1.fit_transform(X_train1)
X_test1_scaled = scaler1.transform(X_test1)

# --- 3. Latih Model ---
print("Melatih model kernel Linear...")
svc_lin1 = SVC(kernel='linear')
svc_lin1.fit(X_train1_scaled, y_train1)
acc_lin1 = accuracy_score(y_test1, svc_lin1.predict(X_test1_scaled))
print(f"Akurasi: {acc_lin1 * 100:.2f}%")

print("\nMelatih model kernel Polynomial...")
svc_poly1 = SVC(kernel='poly')
svc_poly1.fit(X_train1_scaled, y_train1)
acc_poly1 = accuracy_score(y_test1, svc_poly1.predict(X_test1_scaled))
print(f"Akurasi: {acc_poly1 * 100:.2f}%")

print("\nMelatih model kernel RBF...")
svc_rbf1 = SVC(kernel='rbf')
svc_rbf1.fit(X_train1_scaled, y_train1)
acc_rbf1 = accuracy_score(y_test1, svc_rbf1.predict(X_test1_scaled))
print(f"Akurasi: {acc_rbf1 * 100:.2f}%")

print("\nEksperimen 1 Selesai.")

--- Memulai Eksperimen 1: Split 70:30 ---
Melatih model kernel Linear...
Akurasi: 97.06%

Melatih model kernel Polynomial...
Akurasi: 95.79%

Melatih model kernel RBF...
Akurasi: 98.11%

Eksperimen 1 Selesai.


In [None]:
print("--- Memulai Eksperimen 2: Split 80:20 ---")

# 1. Split data 80:20
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Scaling data
scaler2 = StandardScaler()
X_train2_scaled = scaler2.fit_transform(X_train2)
X_test2_scaled = scaler2.transform(X_test2)

# --- 3. Latih Model ---
print("Melatih model kernel Linear...")
svc_lin2 = SVC(kernel='linear')
svc_lin2.fit(X_train2_scaled, y_train2)
acc_lin2 = accuracy_score(y_test2, svc_lin2.predict(X_test2_scaled))
print(f"Akurasi: {acc_lin2 * 100:.2f}%")

print("\nMelatih model kernel Polynomial...")
svc_poly2 = SVC(kernel='poly')
svc_poly2.fit(X_train2_scaled, y_train2)
acc_poly2 = accuracy_score(y_test2, svc_poly2.predict(X_test2_scaled))
print(f"Akurasi: {acc_poly2 * 100:.2f}%")

print("\nMelatih model kernel RBF...")
svc_rbf2 = SVC(kernel='rbf')
svc_rbf2.fit(X_train2_scaled, y_train2)
acc_rbf2 = accuracy_score(y_test2, svc_rbf2.predict(X_test2_scaled))
print(f"Akurasi: {acc_rbf2 * 100:.2f}%")

print("\nEksperimen 2 Selesai.")

--- Memulai Eksperimen 2: Split 80:20 ---
Melatih model kernel Linear...
Akurasi: 97.63%

Melatih model kernel Polynomial...
Akurasi: 97.16%

Melatih model kernel RBF...
Akurasi: 98.26%

Eksperimen 2 Selesai.


In [None]:
# 1. Buat data untuk tabel
data = {
    'Kernel': ['Linear', 'Polynomial', 'RBF'],
    'Akurasi (70:30)': [f"{acc_lin1 * 100:.2f}%", f"{acc_poly1 * 100:.2f}%", f"{acc_rbf1 * 100:.2f}%"],
    'Akurasi (80:20)': [f"{acc_lin2 * 100:.2f}%", f"{acc_poly2 * 100:.2f}%", f"{acc_rbf2 * 100:.2f}%"]
}

# 2. Buat DataFrame
results_df = pd.DataFrame(data)

# 3. Tampilkan tabel
print("--- Tabulasi Performa Model SVM pada 'voice.csv' ---")
print(results_df.to_markdown(index=False, numalign="left", stralign="left"))

# 4. Kesimpulan
print("\n--- Kesimpulan ---")
print("Berdasarkan tabel di atas, performa terbaik secara konsisten dicapai oleh:")
print(f"- Kernel RBF pada split 70:30 (Akurasi: {acc_rbf1*100:.2f}%)")
print(f"- Kernel RBF pada split 80:20 (Akurasi: {acc_rbf2*100:.2f}%)")
print("\nKernel RBF (Radial Basis Function) paling cocok untuk data ini,")
print("diikuti oleh kernel Linear. Kernel Polynomial menunjukkan performa terendah.")
print("Perbedaan rasio split (70:30 vs 80:20) tidak menunjukkan perbedaan akurasi yang signifikan,")
print("menandakan bahwa model sudah cukup stabil.")

--- Tabulasi Performa Model SVM pada 'voice.csv' ---
| Kernel     | Akurasi (70:30)   | Akurasi (80:20)   |
|:-----------|:------------------|:------------------|
| Linear     | 97.06%            | 97.63%            |
| Polynomial | 95.79%            | 97.16%            |
| RBF        | 98.11%            | 98.26%            |

--- Kesimpulan ---
Berdasarkan tabel di atas, performa terbaik secara konsisten dicapai oleh:
- Kernel RBF pada split 70:30 (Akurasi: 98.11%)
- Kernel RBF pada split 80:20 (Akurasi: 98.26%)

Kernel RBF (Radial Basis Function) paling cocok untuk data ini,
diikuti oleh kernel Linear. Kernel Polynomial menunjukkan performa terendah.
Perbedaan rasio split (70:30 vs 80:20) tidak menunjukkan perbedaan akurasi yang signifikan,
menandakan bahwa model sudah cukup stabil.


**Tugas 2**

In [None]:
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
import warnings

In [None]:
def extract_histogram(image):
    """Mengambil gambar (grayscale) dan mengembalikan histogram 1D yang dinormalisasi."""
    if len(image.shape) > 2:
         image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Hitung histogram
    hist = cv2.calcHist([image], [0], None, [128], [0, 256])

    # Normalisasi histogram
    cv2.normalize(hist, hist)

    # Ratakan (flatten) histogram menjadi 1D array
    return hist.flatten()

print("Fungsi extract_histogram() siap digunakan.")

Fungsi extract_histogram() siap digunakan.


In [None]:
try:
    all_images_list = train_std_img_list + test_std_img_list
    print(f"Total gambar dari Praktikum 5: {len(all_images_list)}")

    X_hist_features = []
    y_hist_labels = []

    # Loop semua gambar, ekstrak histogram dan labelnya
    for img, label in all_images_list:
        hist_features = extract_histogram(img)
        X_hist_features.append(hist_features)
        y_hist_labels.append(label)

    # Ubah menjadi array
    X_hist = np.array(X_hist_features)
    y_hist = np.array(y_hist_labels)

    print(f"Dataset fitur histogram berhasil dibuat. Shape: {X_hist.shape}")
    print(f"Dataset label berhasil dibuat. Shape: {y_hist.shape}")

except NameError:
    print("ERROR: Variabel 'train_std_img_list' dan 'test_std_img_list' tidak ditemukan.")
    print("Pastikan Anda sudah menjalankan sel kode dari Praktikum 5 terlebih dahulu.")

Total gambar dari Praktikum 5: 400
Dataset fitur histogram berhasil dibuat. Shape: (400, 128)
Dataset label berhasil dibuat. Shape: (400,)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_hist, y_hist, test_size=0.2, random_state=42, stratify=y_hist)

# scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data histogram berhasil di-split dan di-scaling.")

# Latih model
model_rbf = SVC(kernel='rbf')
model_rbf.fit(X_train_scaled, y_train)

# Evaluasi model
y_pred = model_rbf.predict(X_test_scaled)
acc_rbf_default = accuracy_score(y_test, y_pred)

print(f"\n--- Model SVM RBF (Default) ---")
print(f"Akurasi: {acc_rbf_default * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Data histogram berhasil di-split dan di-scaling.

--- Model SVM RBF (Default) ---
Akurasi: 97.50%

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        40
           1       0.97      0.97      0.97        40

    accuracy                           0.97        80
   macro avg       0.97      0.97      0.97        80
weighted avg       0.97      0.97      0.97        80



In [None]:
print("--- Eksperimen Hyperparameter Tuning ---")
print("Mencari C dan gamma terbaik...")

# Tentukan parameter yang ingin diuji
param_grid = {
    'C': [0.1, 1, 10, 100],           # Parameter regularisasi
    'gamma': [1, 0.1, 0.01, 0.001]    # Koefisien kernel
}

# Buat model GridSearchCV
# cv=3 berarti 3-fold cross-validation
grid = GridSearchCV(SVC(kernel='rbf'), param_grid, refit=True, verbose=2, cv=3)

# Latih pada data training
grid.fit(X_train_scaled, y_train)

# --- Evaluasi Model Terbaik ---
print("\n--- Hasil Tuning ---")
print(f"Parameter terbaik ditemukan: {grid.best_params_}")

# Evaluasi model terbaik (sudah di-refit) pada data test
acc_rbf_tuned = grid.score(X_test_scaled, y_test)

print(f"\nAkurasi RBF (Default): {acc_rbf_default * 100:.2f}%")
print(f"Akurasi RBF (Tuned):   {acc_rbf_tuned * 100:.2f}%")

print("\nLaporan Klasifikasi (Model Tuned):")
y_pred_tuned = grid.predict(X_test_scaled)
print(classification_report(y_test, y_pred_tuned))

--- Eksperimen Hyperparameter Tuning ---
Mencari C dan gamma terbaik... (Ini mungkin perlu waktu)
Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] END .....................................C=0.1, gamma=1; total time=   0.0s
[CV] END .....................................C=0.1, gamma=1; total time=   0.0s
[CV] END .....................................C=0.1, gamma=1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ..................................C=0.1, gamma=0.01; total time=   0.0s
[CV] END ..................................C=0.1, gamma=0.01; total time=   0.0s
[CV] END ..................................C=0.1, gamma=0.01; total time=   0.0s
[CV] END .................................C=0.1, gamma=0.001; total time=   0.0s
[CV] END ......................

Proses Hyperparameter tuning mengkonfirmasi bahwa parameter default sudah optimal. Meskipun GridSearchCV menemukan {'C': 1, 'gamma': 0.01} sebagai parameter terbaik dari 16 kandidat, akurasi yang dihasilkan (97.50%) sama dengan model default, membuktikan bahwa tidak ada peningkatan signifikan yang bisa didapat dari tuning lebih lanjut pada parameter tersebut.