In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1. Load dataset
df = pd.read_csv('Social_Network_Ads.csv')

# 2. Preprocessing: Ubah Gender menjadi angka
# Male: 0, Female: 1
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})

# 3. Menentukan variabel X (fitur) dan y (target)
# Kita menggunakan Gender, Age, dan EstimatedSalary sebagai fitur
X = df[['Gender', 'Age', 'EstimatedSalary']]
y = df['Purchased']

# 4. Split data (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Scaling (Sangat penting untuk KNN karena perbedaan rentang angka Age dan Salary)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Membuat Model KNN
k = 5
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X_train_scaled, y_train)

# 7. Prediksi pada data uji
y_pred = model.predict(X_test_scaled)

# 8. Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi Model: {accuracy * 100:.2f}%")
print("\nLaporan Klasifikasi:")
print(classification_report(y_test, y_pred))
print("\nMatriks Konfusi:")
print(confusion_matrix(y_test, y_pred))

# 9. Input Data Baru untuk Prediksi
print("\n" + "="*30)
print("MASUKKAN DATA PENGGUNA BARU")
print("="*30)

gender = int(input("Input Jenis Kelamin (0 = Laki-laki, 1 = Perempuan): "))
age = int(input("Input Umur: "))
salary = float(input("Input Estimasi Gaji: "))

new_user = pd.DataFrame({
    'Gender': [gender],
    'Age': [age],
    'EstimatedSalary': [salary]
})

# Pastikan urutan kolom sesuai dengan X
new_user = new_user[X.columns]

# Lakukan scaling pada data baru
new_user_scaled = scaler.transform(new_user)

# Prediksi
prediction = model.predict(new_user_scaled)

print("\nHasil Prediksi:")
if prediction[0] == 1:
    print(">> Pengguna diprediksi akan MEMBELI (Purchased).")
else:
    print(">> Pengguna diprediksi TIDAK AKAN MEMBELI (Not Purchased).")

Akurasi Model: 92.50%

Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.96      0.92      0.94        52
           1       0.87      0.93      0.90        28

    accuracy                           0.93        80
   macro avg       0.91      0.93      0.92        80
weighted avg       0.93      0.93      0.93        80


Matriks Konfusi:
[[48  4]
 [ 2 26]]

MASUKKAN DATA PENGGUNA BARU


Input Jenis Kelamin (0 = Laki-laki, 1 = Perempuan):  0
Input Umur:  30
