### Import Library

In [36]:
import numpy as np
import joblib
import os
import io
import glob
import shutil

from rembg import remove
from PIL import Image
from scipy.stats import skew

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin

### Menentukan path citra uji

In [30]:
test_images_path = "data/test/"
image1 = test_images_path + "Image_165.jpg"

### Membuat pipeline sekaligus pelatihan agar nantinya dapat digunakan untuk inferensi citra baru

In [31]:
def resize_image(image_path, target_size=(128, 128)):
    image = Image.open(image_path)
    image = image.resize(target_size)
    return image

def remove_background(image_path):
    with open(image_path, 'rb') as input_file:
        input_data = input_file.read()
        output_data = remove(input_data)  
    image = Image.open(io.BytesIO(output_data)).convert("RGBA")
    return image

def extract_color_moments(image_path):
    image = Image.open(image_path)
    image_array = np.array(image)

    if len(image_array.shape) == 2:  
        image_array = np.stack((image_array,) * 3, axis=-1)
    elif image_array.shape[2] == 4: 
        image_array = image_array[:, :, :3]  
    
    mean = np.mean(image_array, axis=(0, 1))
    std_dev = np.std(image_array, axis=(0, 1))
    skewness = skew(image_array.reshape(-1, image_array.shape[2]), axis=0)
    
    return np.concatenate([mean, std_dev, skewness])

In [32]:
class ImageFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, target_size=(128, 128), remove_bg=False, temp_dir=None):
        self.target_size = target_size
        self.remove_bg = remove_bg
        self.temp_dir = temp_dir if temp_dir else 'temp_processed_images'
        
        if self.remove_bg and not os.path.exists(self.temp_dir):
            os.makedirs(self.temp_dir)
        
    def fit(self, X, y=None):
        return self
        
    def transform(self, X):
        features = []
        
        for i, image_path in enumerate(X):
            if self.remove_bg:
                img = remove_background(image_path)
                temp_path = os.path.join(self.temp_dir, f"temp_{i}.png")
                img.save(temp_path)
                img = resize_image(temp_path, self.target_size)
                
                feature_vector = extract_color_moments(temp_path)
            else:
                img = resize_image(image_path, self.target_size)
                feature_vector = extract_color_moments(image_path)
                
            features.append(feature_vector)
            
        return np.array(features)

In [33]:
def prepare_butterfly_dataset(base_path="data/masked-train-set/"):
    classes = ["Adonis", "Clouded Sulphur", "Scarce Swallow"]
    
    image_paths = []
    labels = []
    label_map = {i: cls for i, cls in enumerate(classes)}
    
    for i, cls in enumerate(classes):
        class_path = os.path.join(base_path, cls)
        class_images = glob.glob(os.path.join(class_path, "*.jpg")) + \
                        glob.glob(os.path.join(class_path, "*.jpeg")) + \
                        glob.glob(os.path.join(class_path, "*.png"))
        
        image_paths.extend(class_images)
        labels.extend([i] * len(class_images))
    
    return image_paths, labels, label_map

In [34]:
def butterfly_classification_pipeline(base_path="data/masked-train-set/", 
                                    target_size=(128, 128), 
                                    remove_bg=False,
                                    test_size=0.3,
                                    random_state=42):
    image_paths, labels, label_map = prepare_butterfly_dataset(base_path)
    
    X_train, X_test, y_train, y_test = train_test_split(
        image_paths, labels, test_size=test_size, random_state=random_state
    )
    
    pipeline = Pipeline([
        ('feature_extraction', ImageFeatureExtractor(target_size=target_size, remove_bg=remove_bg)),
        ('classifier', KNeighborsClassifier(n_neighbors=3))
    ])
    
    pipeline.fit(X_train, y_train)

    os.makedirs('model/labelmap', exist_ok=True)

    joblib.dump(pipeline, 'model/butterfly_classifier.joblib')
    with open(f"model/labelmap/butterfly_classifier_labels.txt", 'w') as f:
        for label_id, label_name in label_map.items():
            f.write(f"{label_id}:{label_name}\n")
    
    y_pred = pipeline.predict(X_test)
    report = classification_report(y_test, y_pred, target_names=[label_map[i] for i in range(len(label_map))])
    
    return pipeline, report, label_map

In [35]:
if __name__ == "__main__":
    image_path = "data/masked-train-set/"
    
    trained_pipeline, classification_report, label_map = butterfly_classification_pipeline(
        base_path=image_path,
        target_size=(128, 128),
        remove_bg=True
    )
    
    print("Butterfly Classification Report:")
    print(classification_report)
    
    if os.path.exists(image1):
        prediction = trained_pipeline.predict([image1])[0]
        print(f"Prediction for new image: {label_map[prediction]}")

    if os.path.exists("temp_processed_images"):
        shutil.rmtree("temp_processed_images")
        print("Temp folder removed successfully.")

Butterfly Classification Report:
                 precision    recall  f1-score   support

         Adonis       1.00      0.96      0.98        28
Clouded Sulphur       1.00      0.96      0.98        25
 Scarce Swallow       0.94      1.00      0.97        31

       accuracy                           0.98        84
      macro avg       0.98      0.97      0.98        84
   weighted avg       0.98      0.98      0.98        84

Prediction for new image: Adonis
Temp folder removed successfully.


### Kesimpulan
Pipeline ini menggabungkan beberapa langkah preprocessing dan model klasifikasi ke dalam satu objek, sehingga memudahkan penggunaan untuk inferensi citra baru. Contoh inferensi dapat dilihat pada file [`inference.py`](https://github.com/nabilsaragih/compvis-course/blob/main/image-classification/inference.py)