In [48]:
import numpy as np
import pandas as pd
from PIL import Image
import os
import time
from matplotlib.colors import LinearSegmentedColormap

In [None]:
def image_convert(dir, new_size):
    
    imagenes = []

    for filename in os.listdir(dir):

        file_path = os.path.join(dir, filename)
        
        if os.path.isfile(file_path):

            # Sin este if no funciona en mi windows
            if not filename.lower().endswith('.png'):
                continue

            try:
                with Image.open(file_path) as img:
                    img = img.resize(new_size) 
                    img = img.convert('L') # Convertir a escala de grises
                    img_array = np.array(img)/255.0 # Convertir a array y normalizar
                    img_vector = img_array.reshape((new_size[0]**2)) 
                    imagenes.append(img_vector)

            except Exception as e:
                print(f"Error procesando {file_path}: {e}")

    return np.array(imagenes)

Convertimos las imágenes a arreglos de pixeles y separamos en train y test

In [50]:
# Defino tamaño de compresion
grande = (256, 256)
mediano =  (128, 128)
chico = (64, 64)

new_size = chico  

En el dataset provisto tenemos 1617 dibujos de pacientes sanos y 1629 dibujos de pacientes con parkinson.

Cómo armamos el conjunto de entrenamiento?
* Tiene que tener cantidades parecidas de pacientes con y sin parkinson.
* Aproximadamente el 70% del total de observaciones (en td6)

Podemos hacerlo asi: \
train \
sanos: 1132 \
enfermos: 1140 

test \
sanos: 485 \
enfermos: 489

In [51]:
cant_healthy_train = 1132
cant_park_train = 1140

cant_healthy_test = 485
cant_park_test = 489

In [None]:
# Convierto todas las imágenes a arreglos de pixeles según el tamaño elegido

# Healthy
src_dir = 'DatasetTP/Healthy'
i_healthy = image_convert(src_dir, new_size) 
d_healthy = np.ones((i_healthy.shape[0], 1)) * 0 # Vector de diagnósticos para la gente sana (0)

# Parkinson
src_dir = 'DatasetTP/Parkinson'  
i_park = image_convert(src_dir, new_size) 
d_park = np.ones((i_park.shape[0], 1)) # Vector de diagnósticos para la gente con Parkinson (1)

# Separo en train y test
i_healthy_train = i_healthy[:cant_healthy_train]
d_healthy_train = d_healthy[:cant_healthy_train]

i_healthy_test = i_healthy[cant_healthy_train:cant_healthy_train + cant_healthy_test]
d_healthy_test = d_healthy[cant_healthy_train:cant_healthy_train + cant_healthy_test]

i_park_train = i_park[:cant_park_train]
d_park_train = d_park[:cant_park_train]

i_park_test = i_park[cant_park_train:cant_park_train + cant_park_test]
d_park_test = d_park[cant_park_train:cant_park_train + cant_park_test]

# Combino sanos y enfermos en train y test
i_train = np.vstack((i_healthy_train, i_park_train)) 
d_train = np.vstack((d_healthy_train, d_park_train)) 

i_test = np.vstack((i_healthy_test, i_park_test)) 
d_test = np.vstack((d_healthy_test, d_park_test))

[[0.9372549  0.94509804 0.94509804 ... 0.92156863 0.9254902  0.9254902 ]
 [0.65882353 0.65882353 0.65882353 ... 0.65882353 0.6627451  0.6627451 ]
 [0.64313725 0.61176471 0.61176471 ... 0.6627451  0.66666667 0.66666667]
 ...
 [0.6627451  0.6627451  0.65490196 ... 0.65882353 0.66666667 0.6627451 ]
 [0.6627451  0.6627451  0.65882353 ... 0.6627451  0.66666667 0.66666667]
 [0.65882353 0.65882353 0.65098039 ... 0.65882353 0.6627451  0.66666667]]
