<a href="https://colab.research.google.com/github/pisceno/AI-Generative/blob/main/clasificacion_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import os
import numpy as np
import csv

# ====================================================================
# CONFIGURACIÓN DE DESCARGA
# ====================================================================
# URL RAW DE TU REPOSITORIO DE GITHUB
# Esta URL debe funcionar porque apunta al servidor de contenido crudo.
RAW_URL_WDBC = "https://raw.githubusercontent.com/pisceno/data_sets/main/wdbc.data"
# Nombre del archivo que se guardará localmente
DATA_PATH_LOCAL = "wdbc.data"

print(f"Descargando {DATA_PATH_LOCAL} desde GitHub...")

try:
    response = requests.get(RAW_URL_WDBC)
    response.raise_for_status() # Lanza un error si la descarga falla

    # Guarda el contenido en un archivo local llamado 'wdbc.data'
    with open(DATA_PATH_LOCAL, 'wb') as f:
        f.write(response.content)

    print(f"✅ Archivo {DATA_PATH_LOCAL} descargado y guardado localmente.")

except requests.exceptions.HTTPError as errh:
    print(f"❌ Error HTTP al descargar. Verifica que el archivo exista en la URL RAW.")
except Exception as e:
    print(f"❌ Ocurrió un error general en la descarga: {e}")

Descargando wdbc.data desde GitHub...
✅ Archivo wdbc.data descargado y guardado localmente.


In [None]:
%%writefile clasificacion_NN.py
import numpy as np
import csv
import sys
import os

# ==============================================================================
# CONFIGURACIÓN (Ruta local)
# ==============================================================================
# ¡SOLO EL NOMBRE DEL ARCHIVO LOCAL! El script lo buscará en la máquina de Colab.
DATA_PATH = "wdbc.data"
LEARNING_RATE = 0.01
ITERATIONS = 50000

# ==============================================================================
# FUNCIONES NÚCLEO (IMPLEMENTACIÓN DESDE CERO)
# ==============================================================================

def sigmoid(z):
    """Función de activación Sigmoid: sigma(z) = 1 / (1 + e^(-z))"""
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

def compute_cost(X, y, w):
    """Cálculo del Error (Costo) - Binary Cross-Entropy (Log Loss)"""
    m = len(y)
    h = sigmoid(X @ w)
    h = np.clip(h, 1e-15, 1 - 1e-15)
    cost = (-1/m) * (y.T @ np.log(h) + (1 - y).T @ np.log(1 - h))
    return cost

def gradient_descent(X, y, w, learning_rate, iterations):
    """Algoritmo de optimización de Gradiente Descendente."""
    m = len(y)
    for i in range(iterations):
        h = sigmoid(X @ w)
        gradient = (1/m) * X.T @ (h - y)
        w -= learning_rate * gradient
    return w

def standardize_features(X):
    """Estandariza las características (media 0, desviación estándar 1)."""
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    sigma[sigma == 0] = 1
    X_norm = (X - mu) / sigma
    return X_norm

# ==============================================================================
# EJECUCIÓN PRINCIPAL
# ==============================================================================
if __name__ == "__main__":

    # Verificación de existencia de archivo local
    if not os.path.exists(DATA_PATH):
        print(f"Error: No se encontró el archivo de datos '{DATA_PATH}'.")
        print("Asegúrate de ejecutar la celda de descarga de GitHub previamente.")
        sys.exit(1)

    try:
        # 1. CARGAR Y PREPROCESAR LOS DATOS
        data = []
        # 'open(DATA_PATH, 'r')' busca el archivo localmente
        with open(DATA_PATH, 'r') as file:
            reader = csv.reader(file)
            for row in reader:
                # Ignorar ID (columna 0) y tomar el resto
                data.append(row[1:])

        data = np.array(data)

        # Separar, codificar y estandarizar
        y_raw = data[:, 0]
        X_raw = data[:, 1:].astype(float)
        y = np.where(y_raw == 'M', 1, 0) # M=1, B=0

        X_norm = standardize_features(X_raw)
        X = np.hstack([np.ones((X_norm.shape[0], 1)), X_norm]) # Añadir Bias (w0)

        # 2. ENTRENAMIENTO DEL MODELO
        num_features = X.shape[1]
        initial_w = np.zeros(num_features)
        final_w = gradient_descent(X, y, initial_w, LEARNING_RATE, ITERATIONS)

        # 3. CÁLCULO DEL ERROR FINAL
        final_error = compute_cost(X, y, final_w)

        # 4. GENERAR LA SALIDA REQUERIDA (w0 w1 ... wk E)
        output = []
        output.append(f"w0: {final_w[0]:.6f}")
        for i in range(1, num_features):
            output.append(f"w{i}: {final_w[i]:.6f}")
        output.append(f"E: {final_error:.6f}")

        print(' '.join(output))

    except Exception as e:
        print(f"Ocurrió un error durante la ejecución: {e}")

Writing clasificacion_NN.py


In [3]:
!python clasificacion_NN.py

python3: can't open file '/content/clasificacion_NN.py': [Errno 2] No such file or directory


In [None]:
from google.colab import files

# Esto abrirá una ventana para que selecciones el archivo en tu computadora
print("Por favor, sube el archivo wdbc.data")
uploaded = files.upload()

Por favor, sube el archivo wdbc.data


Saving wdbc.data to wdbc (1).data


In [None]:
import os

# 1. Renombrar el archivo subido (el nombre que Colab le dio) a 'wdbc.data'
try:
    os.rename("wdbc (1).data", "wdbc.data")
    print("✅ Archivo renombrado exitosamente a wdbc.data.")
except FileNotFoundError:
    # Si 'wdbc (1).data' no existe, puede ser que el nombre original sí
    if os.path.exists("wdbc.data"):
        print("El archivo wdbc.data ya existe. Listo para el Paso 2.")
    else:
        print("❌ Error: Los archivos no se encuentran en el entorno. Vuelve a ejecutar la celda de subida.")

✅ Archivo renombrado exitosamente a wdbc.data.


In [6]:
%%writefile clasificacion_NN.py
import numpy as np
import csv
import sys
import os

# ==============================================================================
# CONFIGURACIÓN
# ==============================================================================
DATA_PATH = "wdbc.data"
LEARNING_RATE = 0.01
ITERATIONS = 50000

# ==============================================================================
# FUNCIONES NÚCLEO
# ==============================================================================

def sigmoid(z):
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

def compute_cost(X, y, w):
    m = len(y)
    h = sigmoid(X @ w)
    h = np.clip(h, 1e-15, 1 - 1e-15)
    cost = (-1/m) * (y.T @ np.log(h) + (1 - y).T @ np.log(1 - h))
    return cost

def gradient_descent(X, y, w, learning_rate, iterations):
    m = len(y)
    for i in range(iterations):
        h = sigmoid(X @ w)
        gradient = (1/m) * X.T @ (h - y)
        w -= learning_rate * gradient
    return w

def standardize_features(X):
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    sigma[sigma == 0] = 1
    X_norm = (X - mu) / sigma
    return X_norm

# ==============================================================================
# EJECUCIÓN PRINCIPAL
# ==============================================================================
if __name__ == "__main__":

    if not os.path.exists(DATA_PATH):
        print(f"Error: No se encontró el archivo de datos '{DATA_PATH}'.")
        sys.exit(1)

    try:
        data = []
        with open(DATA_PATH, 'r') as file:
            reader = csv.reader(file)
            for row in reader:
                data.append(row[1:])

        data = np.array(data)

        y_raw = data[:, 0]
        X_raw = data[:, 1:].astype(float)
        y = np.where(y_raw == 'M', 1, 0)

        X_norm = standardize_features(X_raw)
        X = np.hstack([np.ones((X_norm.shape[0], 1)), X_norm])

        num_features = X.shape[1]
        initial_w = np.zeros(num_features)
        final_w = gradient_descent(X, y, initial_w, LEARNING_RATE, ITERATIONS)

        final_error = compute_cost(X, y, final_w)

        output = []
        output.append(f"w0: {final_w[0]:.6f}")
        for i in range(1, num_features):
            output.append(f"w{i}: {final_w[i]:.6f}")
        output.append(f"E: {final_error:.6f}")

        print(' '.join(output))

    except Exception as e:
        print(f"Ocurrió un error durante la ejecución: {e}")

Writing clasificacion_NN.py


In [9]:
!python clasificacion_NN.py

Ocurrió un error durante la ejecución: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (29,) + inhomogeneous part.


In [10]:
%%writefile wdbc.data
842302,M,17.99,10.38,122.8,1001,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
842517,M,20.57,17.77,132.9,1326,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956,0.1238,0.1866,0.2416,0.186,0.275,0.08902
84300903,M,19.69,21.25,130,1203,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
84358402,M,20.29,14.34,135.1,1297,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575,0.1374,0.205,0.4,0.1625,0.2364,0.07678
843786,M,12.45,15.7,82.57,477.1,0.1278,0.17,0.1578,0.08089,0.2087,0.07613,0.3345,0.8902,2.217,27.19,0.00751,0.03345,0.03672,0.01137,0.02165,0.005082,15.47,23.75,103.4,741.6,0.1791,0.5249,0.5355,0.1741,0.3985,0.1244
844359,M,18.25,19.98,119.6,1040,0.09463,0.109,0.1127,0.074,0.1794,0.05742,0.4467,0.7732,3.18,53.91,0.004314,0.01382,0.02254,0.01039,0.01369,0.002179,22.88,27.66,153.2,1606,0.1442,0.2576,0.3784,0.1932,0.3063,0.08368
84458202,M,13.71,20.83,90.2,577.9,0.1189,0.1645,0.09366,0.05985,0.2196,0.07451,0.5835,1.377,3.856,50.96,0.008805,0.03029,0.02488,0.01448,0.01486,0.005412,17.06,28.14,110.6,897,0.1654,0.3682,0.2678,0.1556,0.3196,0.1151
844981,M,13,21.82,87.5,519.8,0.1273,0.1932,0.1859,0.09353,0.235,0.07389,0.3063,1.002,2.406,24.32,0.005731,0.03502,0.03553,0.01226,0.02143,0.003749,15.49,30.73,106.2,739.3,0.1703,0.5401,0.539,0.206,0.4378,0.1072
84501001,M,12.46,24.04,83.97,475.9,0.1186,0.2396,0.2273,0.08543,0.203,0.08243,0.2976,1.599,2.039,23.94,0.007149,0.07217,0.07743,0.01432,0.01789,0.01008,15.09,40.68,97.65,711.4,0.1853,1.058,1.105,0.221,0.4366,0.2075
845636,M,16.02,23.24,102.7,797.8,0.08206,0.06669,0.03299,0.03323,0.1528,0.05697,0.3795,1.187,2.466,40.51,0.004029,0.009269,0.01101,0.007591,0.0146,0.003042,19.19,33.88,123.8,1150,0.1181,0.1551,0.1459,0.09975,0.2948,0.08452
84610002,M,15.78,17.89,103.6,781,0.0971,0.1292,0.09954,0.06606,0.1842,0.06082,0.5058,0.9849,3.564,54.16,0.005771,0.04061,0.02791,0.01282,0.02008,0.004144,20.42,27.28,136.5,1299,0.1396,0.5609,0.3965,0.181,0.3792,0.1048
846226,M,19.17,24.8,132.4,1123,0.0974,0.2458,0.2065,0.1118,0.2397,0.078,0.9555,3.568,11.07,116.2,0.003139,0.08297,0.0889,0.0409,0.04484,0.01284,20.96,29.94,151.7,1332,0.1037,0.3903,0.3639,0.1767,0.3176,0.1023
846381,M,15.85,23.95,103.7,782.7,0.08401,0.1002,0.09938,0.05364,0.1847,0.05338,0.4033,1.078,2.903,36.58,0.009769,0.03126,0.05051,0.01992,0.02981,0.003002,16.84,27.66,112,876.5,0.1131,0.1924,0.2322,0.1119,0.2809,0.06287
84667401,M,13.73,22.61,93.6,578.3,0.1131,0.2293,0.2128,0.08025,0.2069,0.07682,0.2121,1.169,2.061,19.21,0.006429,0.05936,0.05501,0.01628,0.01961,0.008093,15.03,32.01,108.8,697.7,0.1651,0.7725,0.6943,0.2208,0.3596,0.1431
84799002,M,14.54,27.54,96.73,658.8,0.1139,0.1595,0.1639,0.07364,0.2303,0.07077,0.37,1.033,2.879,32.55,0.005607,0.0424,0.04741,0.0109,0.01857,0.005466,17.46,37.13,124.1,943.2,0.1678,0.6577,0.7026,0.1712,0.4218,0.1341
848406,M,14.68,20.13,94.74,684.5,0.09867,0.072,0.07395,0.05259,0.1586,0.05922,0.4727,1.24,3.195,45.4,0.005718,0.01162,0.01998,0.01109,0.0141,0.002085,19.07,30.88,123.4,1138,0.1464,0.1871,0.2914,0.1609,0.3029,0.08216
84862001,M,16.13,20.68,108.1,798.8,0.117,0.2022,0.1722,0.1028,0.2164,0.07356,0.5692,1.073,3.854,54.18,0.007026,0.02501,0.03188,0.01297,0.01689,0.004142,20.96,31.48,136.8,1315,0.1789,0.4233,0.4784,0.2073,0.3706,0.1142
849014,M,19.81,22.15,130,1260,0.09831,0.1027,0.1479,0.09498,0.1582,0.05395,0.7582,1.017,5.865,112.4,0.006494,0.01893,0.03391,0.01521,0.01356,0.001997,27.32,30.88,186.8,2398,0.1512,0.315,0.5372,0.2388,0.2768,0.07615
8510426,B,13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259
8510653,B,13.08,15.71,85.63,520,0.1075,0.127,0.04568,0.0311,0.1967,0.06811,0.1852,0.7477,1.383,14.67,0.004097,0.01898,0.01698,0.00649,0.01678,0.002425,14.5,20.49,96.09,630.5,0.1312,0.2776,0.189,0.07283,0.3184,0.08183
8510824,B,9.504,12.44,60.34,273.9,0.1024,0.06492,0.02956,0.02076,0.1815,0.06905,0.2773,0.9768,1.909,15.7,0.009606,0.01432,0.01985,0.01421,0.02027,0.002968,10.23,15.66,65.13,314.9,0.1324,0.1148,0.08867,0.06227,0.245,0.07773
8511133,M,15.34,14.26,102.5,704.4,0.1073,0.2135,0.2077,0.09756,0.2521,0.07032,0.4388,0.7096,3.384,44.91,0.006789,0.05328,0.06446,0.02252,0.03672,0.004394,18.07,19.08,125.1,980.9,0.139,0.5954,0.6305,0.2393,0.4667,0.09946
851509,M,21.16,23.04,137.2,1404,0.09428,0.1022,0.1097,0.08632,0.1769,0.05278,0.6917,1.127,4.303,93.99,0.004728,0.01259,0.01715,0.01038,0.01083,0.001987,29.17,35.59,188,2615,0.1401,0.26,0.3155,0.2009,0.2822,0.07526
852552,M,16.65,21.38,110,904.6,0.1121,0.1457,0.1525,0.0917,0.1995,0.0633,0.8068,0.9017,5.455,102.6,0.006048,0.01882,0.02741,0.0113,0.01468,0.002801,26.46,31.56,177,2215,0.1805,0.3578,0.4695,0.2095,0.3613,0.09564
852631,M,17.14,16.4,116,912.7,0.1186,0.2276,0.2229,0.1401,0.304,0.07413,1.046,0.976,7.276,111.4,0.008029,0.03799,0.03732,0.02397,0.02308,0.007444,22.25,21.4,152.4,1461,0.1545,0.3949,0.3853,0.255,0.4066,0.1059
852763,M,14.58,21.53,97.41,644.8,0.1054,0.1868,0.1425,0.08783,0.2252,0.06924,0.2545,0.9832,2.11,21.05,0.004452,0.03055,0.02681,0.01352,0.01454,0.003711,17.62,33.21,122.4,896.9,0.1525,0.6643,0.5539,0.2701,0.4264,0.1275
852781,M,18.61,20.25,122.1,1094,0.0944,0.1066,0.149,0.07731,0.1697,0.05699,0.8529,1.849,5.632,93.54,0.01075,0.02722,0.05081,0.01911,0.02293,0.004217,21.31,27.26,139.9,1403,0.1338,0.2117,0.3446,0.149,0.2341,0.07421
85297

Overwriting wdbc.data


In [11]:
!python clasificacion_NN.py

Ocurrió un error durante la ejecución: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (29,) + inhomogeneous part.
