# Image Feature Extraction

In [None]:
import numpy as np

# skimage feature + morphology + measurement
from skimage.feature import local_binary_pattern, hog as sk_hog
from skimage.morphology import skeletonize
from skimage.measure import label, regionprops, moments_hu

# numeric helpers
from scipy.ndimage import convolve

# ML utils
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import pickle

In [None]:
train_path = "output/emnist_train_processed.npz"
test_path = "output/emnist_test_processed.npz"

In [None]:
with np.load(train_path) as data:
    X_train = data['X_train']
    y_train = data['y_train']

with np.load(test_path) as data:
    X_test = data['X_test']
    y_test = data['y_test']

print(f"Train set shape: {X_train.shape}\tTrain labels shape: {y_train.shape}")
print(f"Test set shape: {X_test.shape}\tTest labels shape: {y_test.shape}")

In [None]:
def zoning(img):
    """Divide la imagen (28x28) en una rejilla 4x4 y devuelve:
    - Densidad (ratio de píxeles >0) por celda (16)
    - Proyecciones horizontales (28) normalizadas
    - Proyecciones verticales (28) normalizadas
    Total: 72 features.
    Si la imagen no es 2D, se reinterpreta como cuadrada.
    """
    if img is None:
        return np.zeros(72, dtype=np.float32)
    if img.ndim == 1:
        side = int(np.sqrt(img.size))
        img = img.reshape(side, side)
    img = img.astype(np.float32, copy=False)
    # Normalizar a binario (suponiendo ya 0/1) por robustez
    bin_img = (img > 0).astype(np.float32)
    h, w = bin_img.shape
    gh, gw = h // 4, w // 4
    densities = []
    for i in range(4):
        for j in range(4):
            patch = bin_img[i*gh:(i+1)*gh, j*gw:(j+1)*gw]
            densities.append(patch.mean())
    proj_h = bin_img.sum(axis=1)
    proj_v = bin_img.sum(axis=0)
    proj_h = proj_h / (proj_h.sum() + 1e-7)
    proj_v = proj_v / (proj_v.sum() + 1e-7)
    return np.concatenate([np.array(densities, dtype=np.float32),
                           proj_h.astype(np.float32),
                           proj_v.astype(np.float32)])


def hog(img):
    """Histogram of Oriented Gradients con celdas pequeñas.
    Parámetros elegidos para 28x28:
    orientations=9, pixels_per_cell=(4,4), cells_per_block=(2,2), block_norm='L2-Hys'.
    """
    if img is None:
        return np.zeros(1, dtype=np.float32)
    if img.ndim == 1:
        side = int(np.sqrt(img.size))
        img = img.reshape(side, side)
    img = img.astype(np.float32, copy=False)
    # Escala a [0,1]
    if img.max() > 1.0:
        img = img / 255.0
    features = sk_hog(img,
                      orientations=9,
                      pixels_per_cell=(4, 4),
                      cells_per_block=(2, 2),
                      block_norm='L2-Hys',
                      transform_sqrt=True,
                      visualize=False,
                      feature_vector=True)
    return features.astype(np.float32, copy=False)


def hu(img):
    """Hu invariant moments (7). Se aplica log transform estándar."""
    if img is None:
        return np.zeros(7, dtype=np.float32)
    if img.ndim == 1:
        side = int(np.sqrt(img.size))
        img = img.reshape(side, side)
    bin_img = (img > 0).astype(np.float32)
    vals = moments_hu(bin_img)
    vals = -np.sign(vals) * np.log10(np.abs(vals) + 1e-10)
    return vals.astype(np.float32)


def lbp(img):
    """Local Binary Pattern uniform (P=8,R=1). Histograma normalizado (10 bins)."""
    if img is None:
        return np.zeros(10, dtype=np.float32)
    if img.ndim == 1:
        side = int(np.sqrt(img.size))
        img = img.reshape(side, side)
    img = img.astype(np.float32, copy=False)
    lbp_img = local_binary_pattern(img, P=8, R=1, method='uniform')
    # Uniform patterns produce valores en [0, P+1]. Bins = P+2 = 10
    hist, _ = np.histogram(lbp_img.ravel(), bins=10, range=(0, 10))
    hist = hist.astype(np.float32)
    hist /= (hist.sum() + 1e-7)
    return hist


def skeleton(img):
    """Características del esqueleto:
    - Longitud total (número de píxeles del esqueleto)
    - Número de endpoints (grado 1)
    - Número de branch points (grado >=3)
    - Ratio longitud / área foreground
    Total: 4 features.
    """
    if img is None:
        return np.zeros(4, dtype=np.float32)
    if img.ndim == 1:
        side = int(np.sqrt(img.size))
        img = img.reshape(side, side)
    bin_img = (img > 0).astype(np.uint8)
    skel = skeletonize(bin_img > 0)
    skel_uint = skel.astype(np.uint8)
    # Convolución para contar vecinos (8-connectividad)
    kernel = np.array([[1,1,1],[1,0,1],[1,1,1]], dtype=np.uint8)
    neighbor_count = convolve(skel_uint, kernel, mode='constant', cval=0)
    endpoints = np.sum((skel_uint == 1) & (neighbor_count == 1))
    branches = np.sum((skel_uint == 1) & (neighbor_count >= 3))
    length = skel_uint.sum()
    area = bin_img.sum()
    ratio = length / (area + 1e-7)
    return np.array([length, endpoints, branches, ratio], dtype=np.float32)


def countours(img):
    """Region properties de la componente principal:
    Devuelve: area, eccentricity, extent, solidity, aspect_ratio, perimeter.
    Si no hay regiones válidas -> ceros.
    Total: 6 features.
    """
    if img is None:
        return np.zeros(6, dtype=np.float32)
    if img.ndim == 1:
        side = int(np.sqrt(img.size))
        img = img.reshape(side, side)
    bin_img = (img > 0).astype(np.uint8)
    lbl = label(bin_img)
    props = regionprops(lbl)
    if not props:
        return np.zeros(6, dtype=np.float32)
    # Elegir la región con mayor área
    main = max(props, key=lambda r: r.area)
    minr, minc, maxr, maxc = main.bbox
    height = maxr - minr
    width = maxc - minc
    aspect = width / (height + 1e-7)
    perimeter = main.perimeter if hasattr(main, 'perimeter') else 0.0
    feats = [main.area, main.eccentricity, main.extent, main.solidity, aspect, perimeter]
    return np.array(feats, dtype=np.float32)


def pixel_run_length(img):
    """Run-length statistics horizontales y verticales:
    - mean, std, max run horizontal
    - mean, std, max run vertical
    Total: 6 features.
    """
    if img is None:
        return np.zeros(6, dtype=np.float32)
    if img.ndim == 1:
        side = int(np.sqrt(img.size))
        img = img.reshape(side, side)
    bin_img = (img > 0).astype(np.uint8)

    def runs_1d(arr):
        runs = []
        count = 0
        for v in arr:
            if v == 1:
                count += 1
            elif count > 0:
                runs.append(count)
                count = 0
        if count > 0:
            runs.append(count)
        if not runs:
            return [0]
        return runs

    horiz_runs = []
    for row in bin_img:
        horiz_runs.extend(runs_1d(row))
    vert_runs = []
    for col in bin_img.T:
        vert_runs.extend(runs_1d(col))

    horiz_runs = np.array(horiz_runs, dtype=np.float32)
    vert_runs = np.array(vert_runs, dtype=np.float32)

    def stats(vec):
        return np.array([vec.mean(), vec.std(), vec.max()], dtype=np.float32) if vec.size else np.zeros(3, dtype=np.float32)

    return np.concatenate([stats(horiz_runs), stats(vert_runs)])


def extract_features(X):
    """Extrae y concatena todas las características definidas arriba para cada imagen.
    Devuelve matriz (n_samples, n_features).
    """
    feature_list = []
    for img in X:
        fz = zoning(img)
        fh = hog(img)
        fhu = hu(img)
        fl = lbp(img)
        fs = skeleton(img)
        fc = countours(img)
        fr = pixel_run_length(img)
        feats = np.concatenate([fz, fh, fhu, fl, fs, fc, fr])
        feature_list.append(feats)
    features = np.vstack(feature_list).astype(np.float32)
    print(f"Vector de características total: {features.shape[1]} dimensiones")
    return features

In [None]:
train_features = extract_features(X_train)
test_features = extract_features(X_test)