In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns

url = 'https://raw.githubusercontent.com/farrelrassya/teachingMLDL/main/02.%20Deep%20Learning/Dataset/income.csv'
df = pd.read_csv(url)

print("Kolom tersedia:", df.columns.tolist())

Kolom tersedia: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']


In [2]:
df.replace(' ?', np.nan, inplace=True)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

In [3]:
le = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = le.fit_transform(df[col])

In [4]:
X = df.drop('income', axis=1)
y = df['income']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
models = {
    'K-NN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"\n{name}")
    print(f"Akurasi : {accuracy_score(y_test, y_pred):.2f}")
    print(f"Presisi : {precision_score(y_test, y_pred, average='macro'):.2f}")
    print(f"Recall  : {recall_score(y_test, y_pred, average='macro'):.2f}")
    print(f"F1 Score: {f1_score(y_test, y_pred, average='macro'):.2f}")


K-NN
Akurasi : 0.51
Presisi : 0.39
Recall  : 0.36
F1 Score: 0.36

Decision Tree
Akurasi : 0.46
Presisi : 0.38
Recall  : 0.38
F1 Score: 0.38
