# KNN

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

In [9]:
from zipfile import ZipFile
import os

# Path ke file zip yang sudah diupload
zip_file_path = '/content/Dataset.zip'  # Ganti dengan path file zip kamu

# Folder tujuan untuk mengekstrak file
extract_folder = '/content/'

# Ekstrak file zip
with ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

print(f"Dataset berhasil diekstrak ke {extract_folder}")

Dataset berhasil diekstrak ke /content/


In [None]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [None]:
def load_images_and_labels(dataset_path):
    images = []
    labels = []
    label_map = {}
    label_count = 0

    for folder_name in os.listdir(dataset_path):
        person_folder = os.path.join(dataset_path, folder_name)

        if os.path.isdir(person_folder):
            # Hanya ambil gambar yang jumlahnya >= 5
            image_files = [f for f in os.listdir(person_folder) if f.endswith(('jpg', 'jpeg', 'png'))]
            if len(image_files) >= 5:
                label_map[label_count] = folder_name  # Menyimpan label ke nama orang
                for image_file in image_files:
                    img_path = os.path.join(person_folder, image_file)
                    img = cv2.imread(img_path)

                    # Mengkonversi gambar ke grayscale
                    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                    # Mendeteksi wajah menggunakan Haar Cascade
                    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

                    # Hanya mengambil wajah yang terdeteksi
                    for (x, y, w, h) in faces:
                        face = img[y:y+h, x:x+w]  # Potong wajah
                        face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)  # Konversi ke RGB
                        face = cv2.resize(face, (224, 224))  # Resizing gambar ke ukuran yang sesuai untuk MobileNet
                        images.append(face)
                        labels.append(label_count)

                label_count += 1

    return np.array(images), np.array(labels), label_map

In [None]:
def extract_features(images, batch_size=32):
    model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')
    features = []

    # Memproses gambar dalam batch
    for i in range(0, len(images), batch_size):
        batch = images[i:i+batch_size]
        batch_features = model.predict(batch)
        features.append(batch_features)

    return np.vstack(features)  # Menggabungkan semua fitur dalam batch


In [None]:
dataset_path = '/content/Dataset'

In [None]:
images, labels, label_map = load_images_and_labels(dataset_path)

In [None]:
images = preprocess_input(images)

In [None]:
features = extract_features(images)

  model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━

In [None]:
# Membagi data menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [None]:
# Inisialisasi KNN dan melatih model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

In [None]:
# Memprediksi pada data uji
y_pred = knn.predict(X_test)

# Evaluasi akurasi model
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi KNN: {accuracy * 100:.2f}%")

Akurasi KNN: 24.79%


In [None]:
for i in range(len(y_test)):
    print(f"Prediksi: {label_map[y_pred[i]]}, Asli: {label_map[y_test[i]]}")

Prediksi: Mike_Krzyzewski, Asli: Carlos_Moya
Prediksi: Tony_Blair, Asli: Alejandro_Toledo
Prediksi: Rudolph_Giuliani, Asli: Ariel_Sharon
Prediksi: Abdullah_Gul, Asli: Nicanor_Duarte_Frutos
Prediksi: Carlos_Menem, Asli: Rob_Marshall
Prediksi: Angelina_Jolie, Asli: Carrie-Anne_Moss
Prediksi: Ron_Dittemore, Asli: Alvaro_Uribe
Prediksi: George_W_Bush, Asli: George_W_Bush
Prediksi: Colin_Powell, Asli: Kofi_Annan
Prediksi: Michael_Bloomberg, Asli: Frank_Solich
Prediksi: Scott_Peterson, Asli: Gwyneth_Paltrow
Prediksi: Ariel_Sharon, Asli: Colin_Powell
Prediksi: Jean-David_Levitte, Asli: Condoleezza_Rice
Prediksi: George_Clooney, Asli: Thaksin_Shinawatra
Prediksi: Tony_Blair, Asli: Hamid_Karzai
Prediksi: George_W_Bush, Asli: George_W_Bush
Prediksi: Colin_Powell, Asli: Hu_Jintao
Prediksi: Cameron_Diaz, Asli: Renee_Zellweger
Prediksi: George_W_Bush, Asli: George_W_Bush
Prediksi: George_W_Bush, Asli: Hugh_Grant
Prediksi: Tom_Ridge, Asli: Joe_Lieberman
Prediksi: Gerhard_Schroeder, Asli: Gerhard_Sch

# Logistic Regression & Random Forest

In [1]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [2]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [3]:
# Load Haar Cascade
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [4]:
def detect_faces_and_preprocess(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Deteksi wajah
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    if len(faces) > 0:
        # Ambil bounding box wajah pertama
        (x, y, w, h) = faces[0]
        face_img = img[y:y+h, x:x+w]  # Crop wajah
        face_img = cv2.resize(face_img, (224, 224))  # Resize untuk MobileNet
        face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)  # Konversi ke RGB
        return face_img
    else:
        return None  # Jika tidak ada wajah, kembalikan None

In [5]:
# Fungsi untuk memuat gambar wajah dan label
def load_images_and_labels(dataset_path):
    images = []
    labels = []
    label_map = {}
    label_count = 0

    for folder_name in os.listdir(dataset_path):
        person_folder = os.path.join(dataset_path, folder_name)

        if os.path.isdir(person_folder):
            # Hanya ambil gambar yang jumlahnya >= 5
            image_files = [f for f in os.listdir(person_folder) if f.endswith(('jpg', 'jpeg', 'png'))]
            if len(image_files) >= 5:
                label_map[label_count] = folder_name  # Menyimpan label ke nama orang
                for image_file in image_files:
                    img_path = os.path.join(person_folder, image_file)
                    face_img = detect_faces_and_preprocess(img_path)
                    if face_img is not None:
                        images.append(face_img)
                        labels.append(label_count)
                label_count += 1

    return np.array(images), np.array(labels), label_map


In [6]:
def extract_features(images, batch_size=32):
    model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')
    features = []

    # Proses dalam batch
    for i in range(0, len(images), batch_size):
        batch_images = images[i:i + batch_size]
        batch_features = model.predict(batch_images)
        features.append(batch_features)

    # Gabungkan semua batch menjadi satu array
    return np.vstack(features)

In [7]:
dataset_path = '/content/Dataset'

In [8]:
# Memuat gambar dan label
images, labels, label_map = load_images_and_labels(dataset_path)

# Normalisasi gambar dan preprocessing input untuk MobileNet
images = preprocess_input(images)

# Ekstraksi fitur menggunakan MobileNet
features = extract_features(images)

  model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 987ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 988ms/step
[1m1/1[0m [

In [9]:
# Membagi data menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [10]:
# Model Logistic Regression
model_lr = OneVsRestClassifier(LogisticRegression(max_iter=1000))
model_lr.fit(X_train, y_train)

In [11]:
# Prediksi dan evaluasi
y_pred_lr = model_lr.predict(X_test)
accuracy_lr = accuracy_score(y_test, y_pred_lr)
print(f"Akurasi Logistic Regression: {accuracy_lr * 100:.2f}%")

Akurasi Logistic Regression: 45.90%


In [12]:
# Model Random Forest
model_rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model_rf.fit(X_train, y_train)

In [13]:
# Prediksi dan evaluasi
y_pred_rf = model_rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Akurasi Random Forest: {accuracy_rf * 100:.2f}%")

Akurasi Random Forest: 14.32%
