## Librerías

In [2]:
import cv2
import numpy as np
import pandas as pd
import glob
from skimage.feature import graycomatrix, graycoprops
from skimage.measure import shannon_entropy

import os
import glob
import shutil
import random

## Declaración de función para extracción de features

In [3]:
def extract_features(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convertir RGB
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # convertir a GRAY

    # Color
    mean_r, mean_g, mean_b = img.mean(axis=(0,1))

    # HSV (Hue, Saturation, Value)
    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    _, s, v = cv2.split(hsv)
    mean_saturation = s.mean()
    mean_brightness = v.mean()

    # Entropy (cantidad de información o ruido/desorden)
    entropy = shannon_entropy(gray)

    # GLCM (mira pares de píxeles vecinos) mide la textura
    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0,0] # si pixeles vecinos son diferentes (blanco/negro) = contraste alto
    homogeneity = graycoprops(glcm, 'homogeneity')[0,0] # si pixeles vecinos son iguales (negro/negro) = homogeneidad alta

    # Edge density
    edges = cv2.Canny(gray, 100, 200)
    edge_density = edges.mean() / 255

    return {
        "mean_r": mean_r,
        "mean_g": mean_g,
        "mean_b": mean_b,
        "saturation": mean_saturation,
        "brightness": mean_brightness,
        "entropy": entropy,
        "contrast": contrast,
        "homogeneity": homogeneity,
        "edge_density": edge_density
    }



## Creación de Dataframe de features

In [4]:
# Llamamos a la función y creamos dataframe de features

rows = []

lista_imagenes = glob.glob("resized/resized/*.jpg")

for img_path in lista_imagenes:

    feats = extract_features(img_path)
    
    nombre_archivo = os.path.basename(img_path)
    feats['nombre_archivo'] = nombre_archivo
    feats['artista'] = " ".join(nombre_archivo.split("_")[:-1])
    
    rows.append(feats)

df_features = pd.DataFrame(rows)


## Exportación dataframe features

In [5]:
df_features.to_csv('features.csv', index=False, encoding='utf-8')

In [6]:
df_features

Unnamed: 0,mean_r,mean_g,mean_b,saturation,brightness,entropy,contrast,homogeneity,edge_density,nombre_archivo,artista
0,146.990254,146.990254,146.990254,0.000000,146.990254,6.743850,201.094119,0.169797,0.104023,Albrecht_Durer_1.jpg,Albrecht Durer
1,159.670935,159.670935,159.670935,0.000000,159.670935,6.706434,77.657805,0.207598,0.068590,Albrecht_Durer_10.jpg,Albrecht Durer
2,164.919503,164.919503,164.919503,0.000000,164.919503,7.438298,1018.658764,0.055124,0.310658,Albrecht_Durer_100.jpg,Albrecht Durer
3,143.432153,143.034284,125.672592,36.486183,143.902312,7.361372,755.169391,0.116950,0.236324,Albrecht_Durer_101.jpg,Albrecht Durer
4,210.751098,210.751098,210.751098,0.000000,210.751098,5.892412,340.986782,0.315556,0.103832,Albrecht_Durer_102.jpg,Albrecht Durer
...,...,...,...,...,...,...,...,...,...,...,...
8350,175.625258,144.627322,98.348124,112.814300,175.731281,7.316246,217.531489,0.127208,0.093479,William_Turner_65.jpg,William Turner
8351,168.333393,165.007926,128.280145,80.755879,171.881410,7.560004,251.524466,0.163713,0.119217,William_Turner_66.jpg,William Turner
8352,209.532713,180.242893,146.655425,79.703043,209.549494,6.494696,122.522374,0.244142,0.061599,William_Turner_7.jpg,William Turner
8353,167.384194,163.995367,127.523469,81.077491,170.922917,7.550143,256.711597,0.183247,0.121136,William_Turner_8.jpg,William Turner
