<a href="https://colab.research.google.com/github/mprksa/Blocks2/blob/main/KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!git clone https://github.com/mprksa/Blocks2.git

Cloning into 'Blocks2'...
remote: Enumerating objects: 859, done.[K
remote: Counting objects: 100% (859/859), done.[K
remote: Compressing objects: 100% (845/845), done.[K
remote: Total 859 (delta 27), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (859/859), 1.82 GiB | 23.28 MiB/s, done.
Resolving deltas: 100% (27/27), done.
Updating files: 100% (724/724), done.


# **Bagian 1: Import Library dan Definisikan Fungsi**

In [11]:
import os
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, classification_report

In [12]:
def load_images_and_labels(image_folder, label_file):
    images = []
    labels = []

    # Membaca labels.json
    with open(label_file, 'r') as f:
        label_data = json.load(f)

    # Membaca setiap citra dan labelnya
    for item in label_data['images']:
        img_file = item['file_name']
        img_path = os.path.join(image_folder, img_file)
        image = cv2.imread(img_path)
        if image is not None:
            images.append(image)
            labels.append(item['id'])

    return images, labels

In [13]:
def apply_convolution(image, kernel):
    return cv2.filter2D(image, -1, kernel)

In [14]:
def rgb_to_hsv(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    return hsv_image

In [15]:
def extract_hsv_features_with_convolution(images, labels):
    sobel_x = np.array([[-1, 0, 1],
                        [-2, 0, 2],
                        [-1, 0, 1]])

    sobel_y = np.array([[-1, -2, -1],
                        [0, 0, 0],
                        [1, 2, 1]])

    features = []
    for image, label in zip(images, labels):
        convolved_image_x = apply_convolution(image, sobel_x)
        convolved_image_y = apply_convolution(image, sobel_y)

        convolved_image = cv2.addWeighted(convolved_image_x, 0.5, convolved_image_y, 0.5, 0)

        hsv_image = rgb_to_hsv(convolved_image)

        hsv_values = hsv_image.reshape(-1, 3)
        features.append((hsv_values, label))
    return features

In [16]:
def prepare_data(features):
    data = []
    labels = []
    for hsv_values, label in features:
        data.append(hsv_values)
        labels.extend([label] * len(hsv_values))
    return np.vstack(data), np.array(labels)

In [17]:
def test_model(knn, test_features, k=3):
    true_labels = []
    predicted_labels = []

    for hsv_values, true_label in test_features:
        ret, results, neighbours, dist = knn.findNearest(hsv_values.astype(np.float32), k=k)
        predicted_label = np.bincount(results.flatten().astype(int)).argmax()

        true_labels.append(true_label)
        predicted_labels.append(predicted_label)

    accuracy = np.mean(np.array(true_labels) == np.array(predicted_labels))
    return accuracy, true_labels, predicted_labels

In [18]:
# Fungsi untuk menghitung nilai rata-rata RGB untuk setiap kelas
def calculate_average_rgb(images, labels):
    class_rgb_values = {}
    for image, label in zip(images, labels):
        if label not in class_rgb_values:
            class_rgb_values[label] = []
        class_rgb_values[label].append(np.mean(image, axis=(0, 1)))

    average_rgb_values = {label: np.mean(np.array(rgb_values), axis=0) for label, rgb_values in class_rgb_values.items()}
    return average_rgb_values

# **Bagian 2: Memuat Data dan Mengekstraksi Fitur**

In [None]:
# Membaca data dari masing-masing folder
train_images, train_labels = load_images_and_labels('Blocks2/train/images', 'Blocks2/train/labels.json')
val_images, val_labels = load_images_and_labels('Blocks2/validation/images', 'Blocks2/validation/labels.json')
test_images, test_labels = load_images_and_labels('Blocks2/test/images', 'Blocks2/test/labels.json')

In [12]:
# Ekstraksi fitur dari citra dengan konvolusi
train_features = extract_hsv_features_with_convolution(train_images, train_labels)
val_features = extract_hsv_features_with_convolution(val_images, val_labels)
test_features = extract_hsv_features_with_convolution(test_images, test_labels)

In [13]:
# Menampilkan jumlah gambar di setiap folder
print(f"Number of training images: {len(train_images)}")
print(f"Number of validation images: {len(val_images)}")
print(f"Number of test images: {len(test_images)}")

Number of training images: 0
Number of validation images: 0
Number of test images: 0


# **Bagian 3: Melatih Model dan Menampilkan Grafik Akurasi dan F1 Score**

In [None]:
# Menyiapkan data untuk pelatihan
train_data, train_labels = prepare_data(train_features)
val_data, val_labels = prepare_data(val_features)

In [None]:
# Mencari akurasi untuk berbagai nilai K
k_values = range(1, 11)
train_accuracies = []
val_accuracies = []
val_f1_scores = []

In [None]:
for k in k_values:
    knn = cv2.ml.KNearest_create()
    knn.train(train_data.astype(np.float32), cv2.ml.ROW_SAMPLE, train_labels.astype(np.int32))

    # Menghitung akurasi pada data pelatihan
    train_accuracy, _, _ = test_model(knn, train_features, k=k)
    train_accuracies.append(train_accuracy)

    # Menghitung akurasi dan F1 Score pada data validasi
    val_accuracy, true_labels, predicted_labels = test_model(knn, val_features, k=k)
    val_accuracies.append(val_accuracy)
    val_f1_scores.append(f1_score(true_labels, predicted_labels, average='weighted'))

# Menampilkan hasil
print(f"Train Accuracies: {train_accuracies}")
print(f"Validation Accuracies: {val_accuracies}")
print(f"Validation F1 Scores: {val_f1_scores}")

In [None]:
# Menampilkan grafik akurasi dan F1 Score
plt.figure(figsize=(10, 5))
plt.plot(k_values, train_accuracies, label='Training Accuracy', marker='o')
plt.plot(k_values, val_accuracies, label='Validation Accuracy', marker='o')
plt.plot(k_values, val_f1_scores, label='Validation F1 Score', marker='o')
plt.xlabel('Number of Neighbors (K)')
plt.ylabel('Score')
plt.title('KNN Performance for Different K Values')
plt.legend()
plt.grid()
plt.show()

# **Bagian 4: Menghitung Nilai Rata-Rata RGB dan Menampilkan Hasil Uji**

In [None]:
# Menghitung nilai rata-rata RGB untuk setiap kelas
average_rgb_values = calculate_average_rgb(train_images, train_labels)
print(f"Average RGB values for each class: {average_rgb_values}")

In [None]:
# Menghitung F1 Score pada data uji
best_k = k_values[np.argmax(val_f1_scores)]
knn = cv2.ml.KNearest_create()
knn.train(train_data.astype(np.float32), cv2.ml.ROW_SAMPLE, train_labels.astype(np.int32))
test_accuracy, test_true_labels, test_predicted_labels = test_model(knn, test_features, k=best_k)
test_f1_score = f1_score(test_true_labels, test_predicted_labels, average='weighted')
print(f"Test Accuracy: {test_accuracy}")
print(f"Test F1 Score: {test_f1_score}")
print(classification_report(test_true_labels, test_predicted_labels))

# **Bagian 5: Menyimpan dan Memuat Model**

In [None]:
# Menyimpan model ke file
model_filename = 'knn_model.yml'
knn.save(model_filename)
print(f"Model saved to {model_filename}")

In [None]:
# Memuat model dari file (contoh penggunaan)
knn_loaded = cv2.ml.KNearest_create()
knn_loaded = knn_loaded.load(model_filename)

In [None]:
# Verifikasi model yang dimuat menghasilkan hasil yang sama
loaded_test_accuracy, loaded_test_true_labels, loaded_test_predicted_labels = test_model(knn_loaded, test_features, k=best_k)
loaded_test_f1_score = f1_score(loaded_test_true_labels, loaded_test_predicted_labels, average='weighted')
print(f"Loaded Test Accuracy: {loaded_test_accuracy}")
print(f"Loaded Test F1 Score: {loaded_test_f1_score}")
print(classification_report(loaded_test_true_labels, loaded_test_predicted_labels))