In [None]:
import numpy as np
import matplotlib.pyplot as plt
from process_images import *
from numpy_to_pandas import numpy_to_pandas_dataset
from utils import *

In [None]:
%load_ext autoreload
%autoreload 2

In [15]:
data = np.load("processed_dataset.npz")
df = numpy_to_pandas_dataset("processed_dataset.npz")
df.columns

Index(['pixel_00000', 'pixel_00001', 'pixel_00002', 'pixel_00003',
       'pixel_00004', 'pixel_00005', 'pixel_00006', 'pixel_00007',
       'pixel_00008', 'pixel_00009',
       ...
       'pixel_16375', 'pixel_16376', 'pixel_16377', 'pixel_16378',
       'pixel_16379', 'pixel_16380', 'pixel_16381', 'pixel_16382',
       'pixel_16383', 'label'],
      dtype='object', length=16385)

Ahora tenemos en df un DataFrame con las siguientes columnas:
- pixel_00000 - pixel_16383: 16384 (128x128) columnas, una para cada pixel, con un valor 0-255 de la escala de grises
- label: 0 si es Healthy, 1 si es Parkinson

In [None]:
# Dividimos el dataset en datos de entrenamiento y datos de test (80/20)

# Separar features (píxeles) y target (label)
X = df.drop('label', axis=1).values  # Convertir a numpy array
y = df['label'].values               # Convertir a numpy array

# Configuración del split
test_size = 0.2
random_state = 42
np.random.seed(random_state)  # Para reproducibilidad

# Obtener índices aleatorios
n_samples = len(X)
n_test = int(n_samples * test_size)
indices = np.random.permutation(n_samples)

# Dividir índices
test_indices = indices[:n_test]
train_indices = indices[n_test:]

# Crear splits
X_train = X[train_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]

print(f"Dataset completo: {len(X)} muestras")
print(f"Train set: {len(X_train)} muestras ({len(X_train)/len(X)*100:.1f}%)")
print(f"Test set: {len(X_test)} muestras ({len(X_test)/len(X)*100:.1f}%)")
print(f"\nDistribución de clases en train:")
print(f"  - Healthy (0): {np.sum(y_train == 0)}")
print(f"  - Parkinson (1): {np.sum(y_train == 1)}")
print(f"\nDistribución de clases en test:")
print(f"  - Healthy (0): {np.sum(y_test == 0)}")
print(f"  - Parkinson (1): {np.sum(y_test == 1)}")

Dataset completo: 1632 muestras
Train set: 1306 muestras (80.0%)
Test set: 326 muestras (20.0%)

Distribución de clases en train:
  - Healthy (0): 637
  - Parkinson (1): 669

Distribución de clases en test:
  - Healthy (0): 179
  - Parkinson (1): 147


In [None]:
w_final, b_final, loss_history = gradient_descent(X_train, y_train, alpha=0.0000001, num_epochs=1000)
plot_loss_curve(loss_history)

In [None]:
np.random.seed(0)
w = np.random.randn(X_train.shape[1]) * 0.0001  # mucho más chico
b = 0.0
alpha = 0.0001   # mucho más bajo
num_it = 10000

train_mse_list = []
train_acc_list = []
test_mse_list = []
test_acc_list = []

In [None]:
for it in range(num_it):
    grad_w, grad_b = gradiente(X_train, y_train, w, b)

    w -= alpha * grad_w
    b -= alpha * grad_b

    train_mse, train_acc = compute_metrics(X_train, y_train, w, b)
    test_mse, test_acc = compute_metrics(X_test, y_test, w, b)

    train_mse_list.append(train_mse)
    train_acc_list.append(train_acc)
    test_mse_list.append(test_mse)
    test_acc_list.append(test_acc)

    if it % 5 == 0 or it == num_it - 1:
        print(f"Iteration {it}: Train MSE={train_mse:.4f}, Acc={train_acc:.4f} | Test MSE={test_mse:.4f}, Acc={test_acc:.4f}")

In [None]:
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_mse_list, label='Train MSE')
plt.plot(test_mse_list, label='Test MSE')
plt.xlabel('Iteration')
plt.ylabel('MSE')
plt.legend()
plt.title('Error cuadrático')

plt.subplot(1, 2, 2)
plt.plot(train_acc_list, label='Train Accuracy')
plt.plot(test_acc_list, label='Test Accuracy')
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy')

plt.tight_layout()
plt.show()