# No 1

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import cv2

In [3]:
# 1. Memuat Data
# Pastikan file 'voice.csv' berada di direktori yang sama
df = pd.read_csv('voice.csv')

# Menampilkan 5 baris pertama data
print("5 Baris Pertama Data:")
print(df.head())
print("\nJumlah baris dan kolom:", df.shape)

5 Baris Pertama Data:
   meanfreq        sd    median       Q25       Q75       IQR       skew  \
0  0.059781  0.064241  0.032027  0.015071  0.090193  0.075122  12.863462   
1  0.066009  0.067310  0.040229  0.019414  0.092666  0.073252  22.423285   
2  0.077316  0.083829  0.036718  0.008701  0.131908  0.123207  30.757155   
3  0.151228  0.072111  0.158011  0.096582  0.207955  0.111374   1.232831   
4  0.135120  0.079146  0.124656  0.078720  0.206045  0.127325   1.101174   

          kurt    sp.ent       sfm  ...  centroid   meanfun    minfun  \
0   274.402906  0.893369  0.491918  ...  0.059781  0.084279  0.015702   
1   634.613855  0.892193  0.513724  ...  0.066009  0.107937  0.015826   
2  1024.927705  0.846389  0.478905  ...  0.077316  0.098706  0.015656   
3     4.177296  0.963322  0.727232  ...  0.151228  0.088965  0.017798   
4     4.333713  0.971955  0.783568  ...  0.135120  0.106398  0.016931   

     maxfun   meandom    mindom    maxdom   dfrange   modindx  label  
0  0.275862

In [4]:
# 2. Pra-pemrosesan Data

# Mengubah kolom 'label' menjadi biner (male=1, female=0)
df['label'] = df['label'].map({'male': 1, 'female': 0})

# Memisahkan Fitur (X) dan Target (y)
X = df.drop('label', axis=1)
y = df['label']

# Standardisasi Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

print("\nData setelah di-scaling:")
print(X_scaled.head())


Data setelah di-scaling:
   meanfreq        sd    median       Q25       Q75       IQR      skew  \
0 -4.049248  0.427355 -4.224901 -2.576102 -5.693607 -0.214778  2.293306   
1 -3.841053  0.611669 -3.999293 -2.486885 -5.588987 -0.258485  4.548056   
2 -3.463066  1.603848 -4.095851 -2.706986 -3.928699  0.909326  6.513656   
3 -0.992157  0.899998 -0.759454 -0.901418 -0.711205  0.632690 -0.449858   
4 -1.530640  1.322561 -1.676948 -1.268395 -0.792029  1.005588 -0.480911   

       kurt    sp.ent       sfm      mode  centroid   meanfun    minfun  \
0  1.762946 -0.039083  0.471575 -2.141210 -4.049248 -1.812038 -1.097998   
1  4.433008 -0.065236  0.594431 -2.141210 -3.841053 -1.079594 -1.091533   
2  7.326207 -1.083730  0.398261 -2.141210 -3.463066 -1.365368 -1.100397   
3 -0.240099  1.516383  1.797340 -1.054576 -0.992157 -1.666966 -0.988934   
4 -0.238940  1.708336  2.114740 -0.790514 -1.530640 -1.127233 -1.034015   

     maxfun   meandom    mindom    maxdom   dfrange   modindx  
0  0.565

In [5]:
# 3. Fungsi Pembantu untuk Melatih dan Evaluasi Model

def train_and_evaluate_svm(X, y, test_size, kernel_type, results_df):
    """
    Melatih model SVM, mengevaluasi akurasi, dan mencatat hasilnya.
    """

    # Split Data
    ratio_str = f"{int((1-test_size)*100)}:{int(test_size*100)}"
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=42, stratify=y
    )

    # Inisialisasi dan Latih Model SVM
    # Untuk kernel linear, kita gunakan C=1 (nilai default)
    # Untuk kernel polynomial dan RBF, kita gunakan C=1 dan gamma='scale' (nilai default)

    if kernel_type == 'linear':
        model = SVC(kernel=kernel_type, random_state=42)
    elif kernel_type == 'poly':
        model = SVC(kernel=kernel_type, degree=3, random_state=42) # degree=3 default
    elif kernel_type == 'rbf':
        model = SVC(kernel=kernel_type, random_state=42) # gamma='scale' default

    model.fit(X_train, y_train)

    # Prediksi dan Evaluasi
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    # Catat Hasil
    new_result = pd.DataFrame([{
        'Rasio Split': ratio_str,
        'Kernel': kernel_type.upper(),
        'Akurasi': accuracy
    }])

    return pd.concat([results_df, new_result], ignore_index=True)

# Dataframe untuk menampung hasil tabulasi
results_df = pd.DataFrame(columns=['Rasio Split', 'Kernel', 'Akurasi'])

print("Fungsi 'train_and_evaluate_svm' siap digunakan.")

Fungsi 'train_and_evaluate_svm' siap digunakan.


In [6]:
# 4. Pelatihan Model dengan Rasio 70:30 (test_size = 0.3)

# Kernel Linier
results_df = train_and_evaluate_svm(X_scaled, y, 0.3, 'linear', results_df)

# Kernel Polynomial
results_df = train_and_evaluate_svm(X_scaled, y, 0.3, 'poly', results_df)

# Kernel RBF
results_df = train_and_evaluate_svm(X_scaled, y, 0.3, 'rbf', results_df)

print("Hasil sementara (Split 70:30):")
print(results_df)

  return pd.concat([results_df, new_result], ignore_index=True)


Hasil sementara (Split 70:30):
  Rasio Split  Kernel   Akurasi
0       70:30  LINEAR  0.978970
1       70:30    POLY  0.958991
2       70:30     RBF  0.983176


In [15]:
# 5. Pelatihan Model dengan Rasio 80:20 (test_size = 0.2)

# Kernel Linier
results_df = train_and_evaluate_svm(X_scaled, y, 0.2, 'linear', results_df)

# Kernel Polynomial
results_df = train_and_evaluate_svm(X_scaled, y, 0.2, 'poly', results_df)

# Kernel RBF
results_df = train_and_evaluate_svm(X_scaled, y, 0.2, 'rbf', results_df)

print("Hasil akhir (Semua model):")
print(results_df)

Hasil akhir (Semua model):
  Rasio Split  Kernel   Akurasi
0       70:30  LINEAR  0.978970
1       70:30    POLY  0.958991
2       70:30     RBF  0.983176
3       80:20  LINEAR  0.974763
4       80:20    POLY  0.957413
5       80:20     RBF  0.982650
6       80:20  LINEAR  0.974763
7       80:20    POLY  0.957413
8       80:20     RBF  0.982650


# No 2 ada di file Praktikum5.ipynb