# TUGAS PRAKTIKUM

## INPUT

In [None]:
import os, numpy as np, pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

## Load voice.csv

In [None]:
VOICE_CANDIDATES = [
    "/mnt/data/voice.csv",
    "/mnt/data/voice (1).csv",
    "/content/voice.csv",
    "/content/voice (1).csv",
    "images/voice.csv", # Added the correct path
]
voice_path = next((p for p in VOICE_CANDIDATES if os.path.exists(p)), None)
if voice_path is None:
    raise FileNotFoundError("voice.csv tidak ditemukan. Upload ke /mnt/data atau /content terlebih dahulu.")

df = pd.read_csv(voice_path).dropna().copy()

# Deteksi kolom label (paling umum: 'label'/'gender')
label_col = None
for cand in ["label", "gender", "class", "target"]:
    if cand in df.columns:
        label_col = cand; break
if label_col is None:
    obj_cols = [c for c in df.columns if df[c].dtype == "object"]
    if len(obj_cols) == 1:
        label_col = obj_cols[0]
    else:
        raise ValueError(f"Tidak bisa menebak kolom label. Kolom: {list(df.columns)}")

le = LabelEncoder()
y = le.fit_transform(df[label_col].values)
X = df.drop(columns=[label_col]).values

## StandardScaler + SVC

In [None]:
def evaluate_svm(X, y, test_size=0.30, kernel="linear", degree=3, C=1.0, gamma="scale"):
    """
    Poin: split -> scaling -> train SVC -> akurasi
    - test_size: 0.30 (70:30) / 0.20 (80:20)
    - kernel: 'linear' | 'poly' | 'rbf'
    - degree: khusus 'poly'
    - C, gamma: khusus 'rbf'
    """
    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
    params = {"kernel": kernel}
    if kernel == "poly":
        params["degree"] = degree
    if kernel == "rbf":
        params["C"] = C
        params["gamma"] = gamma
    model = Pipeline([("scaler", StandardScaler()), ("svc", SVC(**params))])
    model.fit(Xtr, ytr)
    acc = accuracy_score(yte, model.predict(Xte))
    return acc


## Rekap akurasi semua kombinasi

In [None]:
splits = {"70:30": 0.30, "80:20": 0.20}
kernels = ["linear", "poly", "rbf"]
rows = []
for sname, ts in splits.items():
    for k in kernels:
        acc = evaluate_svm(X, y, test_size=ts, kernel=k, degree=3, C=1.0, gamma="scale")
        rows.append({"split": sname, "kernel": k, "accuracy": acc})
results_df = pd.DataFrame(rows).sort_values(["split", "kernel"]).reset_index(drop=True)
print("=== Rekap Akurasi — voice.csv ===")
display(results_df) if "display" in globals() else print(results_df)
results_df.to_csv("svm_voice_results_simple.csv", index=False)

=== Rekap Akurasi — voice.csv ===
   split  kernel  accuracy
0  70:30  linear  0.978970
1  70:30    poly  0.960042
2  70:30     rbf  0.983176
3  80:20  linear  0.974763
4  80:20    poly  0.955836
5  80:20     rbf  0.982650
