In [51]:
import pandas as pd
import cv2
import numpy as np
import os
import time
import matplotlib.pyplot as plt

from tqdm import tqdm
from rembg import remove
from PIL import Image
from scipy.stats import skew

In [23]:
image_path = "data/masked-train-set/"
print(os.listdir(image_path))

['ADONIS', 'AFRICAN GIANT SWALLOWTAIL', 'AMERICAN SNOOT', 'AN 88', 'APPOLLO', 'ATALA', 'BANDED ORANGE HELICONIAN', 'BANDED PEACOCK', 'BECKERS WHITE', 'BLACK HAIRSTREAK', 'BLUE MORPHO', 'BLUE SPOTTED CROW', 'BROWN SIPROETA', 'CABBAGE WHITE', 'CAIRNS BIRDWING', 'CHECQUERED SKIPPER', 'CHESTNUT', 'CLEOPATRA', 'CLODIUS PARNASSIAN', 'CLOUDED SULPHUR', 'COMMON BANDED AWL', 'COMMON WOOD-NYMPH', 'COPPER TAIL', 'CRECENT', 'CRIMSON PATCH', 'DANAID EGGFLY', 'EASTERN COMA', 'EASTERN DAPPLE WHITE', 'EASTERN PINE ELFIN', 'ELBOWED PIERROT', 'GOLD BANDED', 'GREAT EGGFLY', 'GREAT JAY', 'GREEN CELLED CATTLEHEART', 'GREY HAIRSTREAK', 'INDRA SWALLOW', 'IPHICLUS SISTER', 'JULIA', 'LARGE MARBLE', 'MALACHITE', 'MANGROVE SKIPPER', 'MESTRA', 'METALMARK', 'MILBERTS TORTOISESHELL', 'MONARCH', 'MOURNING CLOAK', 'ORANGE OAKLEAF', 'ORANGE TIP', 'ORCHARD SWALLOW', 'PAINTED LADY', 'PAPER KITE', 'PEACOCK', 'PINE WHITE', 'PIPEVINE SWALLOW', 'POPINJAY', 'PURPLE HAIRSTREAK', 'PURPLISH COPPER', 'QUESTION MARK', 'RED ADMIRA

In [27]:
adonis = image_path + "ADONIS/"
clouded_sulphur = image_path + "CLOUDED SULPHUR/"
scarce_swallow = image_path + "SCARCE SWALLOW/"

print(f"Adonis: {len(os.listdir(adonis))}")
print(f"Clouded Sulphur: {len(os.listdir(clouded_sulphur))}")
print(f"Scarce Swallow: {len(os.listdir(scarce_swallow))}")

output_path = "data/no-bg/"
if not os.path.exists(output_path):
    os.makedirs(output_path)


Adonis: 88
Clouded Sulphur: 92
Scarce Swallow: 97


In [35]:
def remove_bg_and_resize(input_dir, output_dir, size=(128, 128)):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(input_dir):
        if filename.lower().endswith((".jpg", ".jpeg", ".png")):
            input_file = os.path.join(input_dir, filename)
            output_file = os.path.join(output_dir, os.path.splitext(filename)[0] + ".png")

            start_time = time.time()

            with open(input_file, "rb") as f:
                input_image = Image.open(f).convert("RGBA")

            output_image = remove(input_image)
            output_image = output_image.resize(size, Image.LANCZOS)
            output_image.save(output_file)

            print(f"Processed: {filename}, elapsed time: {time.time() - start_time:.2f} seconds")

remove_bg_and_resize(adonis, os.path.join(output_path, "adonis"))
remove_bg_and_resize(clouded_sulphur, os.path.join(output_path, "clouded_sulphur"))
remove_bg_and_resize(scarce_swallow, os.path.join(output_path, "scarce_swallow"))


Processed: Image_1087.jpg, elapsed time: 1.22 seconds
Processed: Image_1131.jpg, elapsed time: 1.11 seconds
Processed: Image_1211.jpg, elapsed time: 1.10 seconds
Processed: Image_1565.jpg, elapsed time: 1.06 seconds
Processed: Image_1712.jpg, elapsed time: 1.03 seconds
Processed: Image_1772.jpg, elapsed time: 1.05 seconds
Processed: Image_1849.jpg, elapsed time: 1.05 seconds
Processed: Image_1858.jpg, elapsed time: 1.06 seconds
Processed: Image_1907.jpg, elapsed time: 1.04 seconds
Processed: Image_1927.jpg, elapsed time: 1.06 seconds
Processed: Image_1937.jpg, elapsed time: 1.03 seconds
Processed: Image_2.jpg, elapsed time: 1.00 seconds
Processed: Image_2101.jpg, elapsed time: 1.02 seconds
Processed: Image_2132.jpg, elapsed time: 1.02 seconds
Processed: Image_2203.jpg, elapsed time: 1.00 seconds
Processed: Image_2255.jpg, elapsed time: 1.01 seconds
Processed: Image_2275.jpg, elapsed time: 0.99 seconds
Processed: Image_2360.jpg, elapsed time: 0.99 seconds
Processed: Image_2401.jpg, elap

In [53]:
def extract_color_moments(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    means = np.mean(image_rgb, axis=(0, 1)) 
    stds = np.std(image_rgb, axis=(0, 1))  
    skews = skew(image_rgb.reshape((-1, 3)), axis=0)  

    features = np.concatenate([means, stds, skews])
    columns = [
        'means_R', 'means_G', 'means_B',
        'std_R', 'std_G', 'std_B',
        'skew_R', 'skew_G', 'skew_B'
    ]
    
    return features, columns

def extract_features_from_folder(root_dir):
    data = []
    columns = []
    labels = []

    for class_name in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_name)
        if not os.path.isdir(class_path):
            continue
        
        print(f"Processing class: {class_name}")
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            image = cv2.imread(img_path)
            if image is None:
                continue
            features, columns = extract_color_moments(image)
            data.append(features)
            labels.append(class_name)

    df = pd.DataFrame(data, columns=columns)
    df['label'] = labels
    return df

df = extract_features_from_folder(output_path)
df.to_csv("data/color_moments_features.csv", index=False)
print("Feature extraction complete. Saved to color_moments_features.csv")


Processing class: adonis
Processing class: clouded_sulphur
Processing class: scarce_swallow
Feature extraction complete. Saved to color_moments_features.csv
