<a href="https://colab.research.google.com/github/mprksa/kubus5/blob/main/KNN7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import cv2
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score, KFold
import joblib
import imgaug.augmenters as iaa
from sklearn.preprocessing import StandardScaler

In [3]:
!git clone https://github.com/mprksa/Blocks2.git

Cloning into 'Blocks2'...
remote: Enumerating objects: 887, done.[K
remote: Counting objects: 100% (887/887), done.[K
remote: Compressing objects: 100% (873/873), done.[K
remote: Total 887 (delta 36), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (887/887), 1.82 GiB | 24.20 MiB/s, done.
Resolving deltas: 100% (36/36), done.
Updating files: 100% (729/729), done.


In [4]:
# Fungsi untuk memuat gambar dan label dengan augmentasi data
def load_data(image_folder, label_file, image_size=(256, 256), augment=False):
    images = []
    labels = []

    with open(label_file, 'r') as f:
        data = json.load(f)

    image_id_to_filename = {image['id']: image['file_name'] for image in data['images']}
    category_id_to_name = {category['id']: category['name'] for category in data['categories']}

    augmenters = iaa.Sequential([
        iaa.Fliplr(0.5), # horizontal flip
        iaa.Affine(rotate=(-25, 25)), # rotate
        iaa.Multiply((0.8, 1.2)) # change brightness
    ])

    for annotation in data['annotations']:
        image_id = annotation['image_id']
        category_id = annotation['category_id']

        img_path = os.path.join(image_folder, image_id_to_filename[image_id])
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, image_size)
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                images.append(img_hsv.flatten())
                labels.append(category_id_to_name[category_id])

                if augment:
                    aug_img = augmenters(image=img)
                    aug_img_hsv = cv2.cvtColor(aug_img, cv2.COLOR_BGR2HSV)
                    images.append(aug_img_hsv.flatten())
                    labels.append(category_id_to_name[category_id])

    return np.array(images), np.array(labels)

In [5]:
# Muat data dengan augmentasi
train_images, train_labels = load_data('Blocks2/train/images', 'Blocks2/train/labels.json', augment=True)
test_images, test_labels = load_data('Blocks2/test/images', 'Blocks2/test/labels.json')
val_images, val_labels = load_data('Blocks2/validation/images', 'Blocks2/validation/labels.json')

In [6]:
# Tampilkan jumlah gambar
print(f"Jumlah gambar untuk pelatihan: {len(train_images)}")
print(f"Jumlah gambar untuk testing: {len(test_images)}")
print(f"Jumlah gambar untuk validasi: {len(val_images)}")

Jumlah gambar untuk pelatihan: 1016
Jumlah gambar untuk testing: 72
Jumlah gambar untuk validasi: 144


In [7]:
# Normalisasi data
scaler = StandardScaler()
train_images = scaler.fit_transform(train_images)
test_images = scaler.transform(test_images)
val_images = scaler.transform(val_images)

In [9]:
# Penyetelan Hyperparameter dan Validasi Silang k-Fold
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11]
}

knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(train_images, train_labels)
best_knn = grid_search.best_estimator_ # Add underscore here

In [10]:
# Decision Trees
dt = DecisionTreeClassifier(random_state=42)
dt.fit(train_images, train_labels)

In [21]:
# Penggabungan: K-NN dan Decision Trees
class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)

    def predict(self, X):
        # Get unique labels from training data (assuming it's available in the class)
        # If not, you'll need to pass it as an argument
        unique_labels = np.unique(train_labels)

        predictions = []
        for model in self.models:
            model_predictions = model.predict(X)
            # Convert string labels to numerical indices
            numerical_predictions = np.searchsorted(unique_labels, model_predictions)
            predictions.append(numerical_predictions)

        predictions = np.array(predictions)
        return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)

    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)

    # Add the get_params method to comply with scikit-learn's estimator interface
    def get_params(self, deep=True):
        return {'models': self.models}

ensemble_model = EnsembleModel([best_knn, dt])
ensemble_model.fit(train_images, train_labels)

In [23]:
# Evaluasi dengan validasi silang k-Fold
kf = KFold(n_splits=5)
knn_scores = cross_val_score(best_knn, train_images, train_labels, cv=kf)
dt_scores = cross_val_score(dt, train_images, train_labels, cv=kf)
ensemble_scores = cross_val_score(ensemble_model, train_images, train_labels, cv=kf)

print(f"K-NN Cross-Validation Scores: {knn_scores}")
print(f"Decision Trees Cross-Validation Scores: {dt_scores}")
print(f"Ensemble Model Cross-Validation Scores: {ensemble_scores}")

K-NN Cross-Validation Scores: [0.41176471 0.44827586 0.4679803  0.39901478 0.44334975]
Decision Trees Cross-Validation Scores: [0.85784314 0.97044335 0.98522167 0.99507389 0.99014778]
Ensemble Model Cross-Validation Scores: [0. 0. 0. 0. 0.]


In [24]:
# Evaluasi model
train_predictions = ensemble_model.predict(train_images)
test_predictions = ensemble_model.predict(test_images)
val_predictions = ensemble_model.predict(val_images)

train_accuracy = accuracy_score(train_labels, train_predictions)
test_accuracy = accuracy_score(test_labels, test_predictions)
val_accuracy = accuracy_score(val_labels, val_predictions)

print(f"Akurasi Pelatihan: {train_accuracy:.4f}")
print(f"Akurasi Testing: {test_accuracy:.4f}")
print(f"Akurasi Validasi: {val_accuracy:.4f}")

Akurasi Pelatihan: 0.0000
Akurasi Testing: 0.0000
Akurasi Validasi: 0.0000


In [25]:
# Analisis Kesalahan
val_class_report = classification_report(val_labels, val_predictions)
val_conf_matrix = confusion_matrix(val_labels, val_predictions)

print("Classification Report untuk Validasi:")
print(val_class_report)

print("Confusion Matrix untuk Validasi:")
print(val_conf_matrix)

ValueError: Mix of label input types (string and number)

In [None]:
# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
plt.imshow(val_conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(np.unique(train_labels)))
plt.xticks(tick_marks, np.unique(train_labels), rotation=45)
plt.yticks(tick_marks, np.unique(train_labels))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
# Simpan model penggabungan
joblib.dump(ensemble_model, 'ensemble_model.pkl')

In [None]:
# Pentingnya Fitur pada Decision Trees
importances = dt.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(10, 6))
plt.title("Feature Importances")
plt.bar(range(train_images.shape[1]), importances[indices], align="center")
plt.xticks(range(train_images.shape[1]), indices)
plt.xlim([-1, train_images.shape[1]])
plt.show()