## Feature Engineering with Images

In [1]:
import os
import pandas as pd
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
import importlib

# Check if packages are installed
opencv_installed = importlib.util.find_spec('cv2') is not None
tensorflow_installed = importlib.util.find_spec('tensorflow') is not None
skimage_installed = importlib.util.find_spec('skimage') is not None

print('OpenCV Installed:', opencv_installed)
print('TensorFlow Installed:', tensorflow_installed)
print('scikit-image Installed:', skimage_installed)

if not all([opencv_installed, tensorflow_installed, skimage_installed]):
    print("Please install missing packages.")
    exit()

from tensorflow.keras.applications import VGG16
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

def extract_rgb_histogram(image):
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    return hist.flatten() / hist.sum()

def extract_lbp_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, P=8, R=1)
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 10), range=(0, 9))
    return hist.astype("float") / (hist.sum() + 1e-7)

def extract_cnn_features(image, model):
    img = cv2.resize(image, (224, 224))
    img = img_to_array(img)  # Use the function directly, not as a method
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = model.predict(img)
    return features.flatten()

# Initialize the VGG16 model
vgg_model = VGG16(weights='imagenet', include_top=False, pooling='avg')

# Test functions with a sample image
sample_image_path = 'pokemon_train_images/gen_1.0/Aerodactyl.png'
if os.path.exists(sample_image_path):
    sample_image = cv2.imread(sample_image_path)
    if sample_image is not None:
        print("RGB Histogram shape:", extract_rgb_histogram(sample_image).shape)
        print("LBP Features shape:", extract_lbp_features(sample_image).shape)
        print("CNN Features shape:", extract_cnn_features(sample_image, vgg_model).shape)
    else:
        print("Failed to load sample image")
else:
    print("Sample image not found")

print("All functions defined successfully")

OpenCV Installed: True
TensorFlow Installed: True
scikit-image Installed: True
RGB Histogram shape: (512,)
LBP Features shape: (9,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
CNN Features shape: (512,)
All functions defined successfully


In [2]:
# Load the Pokemon data
df = pd.read_csv('csv/pokemon_train.csv')

# Initialize lists to store features
rgb_features = []
lbp_features = []
cnn_features = []

# Process each image
for _, row in df.iterrows():
    image_path = os.path.join('/Users/marvinchen/Desktop/IW/cos-independent-work/pokemon_train_images', f"gen_{row['Generation']}", f"{row['Name']}.png")
    
    # Load and preprocess image
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Extract features
        rgb_hist = extract_rgb_histogram(image)
        lbp_feat = extract_lbp_features(image)
        cnn_feat = extract_cnn_features(image, vgg_model)
        
        # Append features to lists
        rgb_features.append(rgb_hist)
        lbp_features.append(lbp_feat)
        cnn_features.append(cnn_feat)
    else:
        print(f"Failed to load image: {image_path}")
        rgb_features.append(np.zeros(512))  # Adjust size as needed
        lbp_features.append(np.zeros(9))
        cnn_features.append(np.zeros(512))

# Convert feature lists to numpy arrays
rgb_features = np.array(rgb_features)
lbp_features = np.array(lbp_features)
cnn_features = np.array(cnn_features)

# Combine all features
all_features = np.hstack((rgb_features, lbp_features, cnn_features))

# Create a new dataframe with original data and new features
new_df = pd.concat([df, pd.DataFrame(all_features)], axis=1)

# Save the new dataframe to a CSV file
# new_df.to_csv('pokemon_with_train_features.csv', index=False)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [3]:
import pandas as pd
import numpy as np

# Assuming extracted features as before rgb_features, lbp_features, cnn_features

# Create summary statistics for each feature group
rgb_summary = pd.DataFrame({
    'RGB_Mean': np.mean(rgb_features, axis=1),
    'RGB_Std': np.std(rgb_features, axis=1),
    'RGB_Max': np.max(rgb_features, axis=1),
    'RGB_Min': np.min(rgb_features, axis=1)
})

lbp_summary = pd.DataFrame({
    'LBP_Mean': np.mean(lbp_features, axis=1),
    'LBP_Std': np.std(lbp_features, axis=1),
    'LBP_Max': np.max(lbp_features, axis=1),
    'LBP_Min': np.min(lbp_features, axis=1)
})

cnn_summary = pd.DataFrame({
    'CNN_Mean': np.mean(cnn_features, axis=1),
    'CNN_Std': np.std(cnn_features, axis=1),
    'CNN_Max': np.max(cnn_features, axis=1),
    'CNN_Min': np.min(cnn_features, axis=1)
})

# Combine original data with summary features
new_df = pd.concat([df, rgb_summary, lbp_summary, cnn_summary], axis=1)

# Save the new dataframe to a CSV file
new_df.to_csv('train_image_features.csv', index=False)