In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import math
import numpy as np

In [2]:
# Conjunto de datos para clasificar vinos
# Machine learning repository

data = pd.read_csv("https://raw.githubusercontent.com/cardel/repositorios/main/wine.data", header=None)

In [7]:
data.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185


In [15]:
# Separemos y y X

y = data.iloc[:,0]
X = data.iloc[:,1:]

In [17]:
y.value_counts() #está mas o menos balanceado

0
2    71
1    59
3    48
Name: count, dtype: int64

In [20]:
# Para ayudar a la red a procesar los datos utilizaremos para y
# la técnica one-hot encoding

y_data = pd.get_dummies(y, dtype = int) 
y_data

Unnamed: 0,1,2,3
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
...,...,...,...
173,0,0,1
174,0,0,1
175,0,0,1
176,0,0,1


In [21]:
# Creemos la función sigmoide y evaluemos qué pasa cuando evaluamos datos grandes

f = lambda x: 1/(1+math.exp(-x))
print(f(1480),f(735))

# Notemos que la aplicación de la sigmoide en ambos valores de uno, aun cunado son valores muy distintos, la variable no puede ser representada con clairdad

1.0 1.0


In [22]:
# Para resolver ese problema anteriorment eindicado, vamos a normaliar los datos

scaler = StandardScaler()
scaler.fit(X)

In [23]:
print(scaler.var_,scaler.mean_) # Lo toma para cada una de las columnas

[6.55359730e-01 1.24100408e+00 7.48418003e-02 1.10900306e+01
 2.02843328e+02 3.89489032e-01 9.92113512e-01 1.54016191e-02
 3.25754248e-01 5.34425585e+00 5.19514497e-02 5.01254463e-01
 9.86096010e+04] [1.30006180e+01 2.33634831e+00 2.36651685e+00 1.94949438e+01
 9.97415730e+01 2.29511236e+00 2.02926966e+00 3.61853933e-01
 1.59089888e+00 5.05808988e+00 9.57449438e-01 2.61168539e+00
 7.46893258e+02]


In [24]:
X_data = scaler.transform(X)
X_data

array([[ 1.51861254, -0.5622498 ,  0.23205254, ...,  0.36217728,
         1.84791957,  1.01300893],
       [ 0.24628963, -0.49941338, -0.82799632, ...,  0.40605066,
         1.1134493 ,  0.96524152],
       [ 0.19687903,  0.02123125,  1.10933436, ...,  0.31830389,
         0.78858745,  1.39514818],
       ...,
       [ 0.33275817,  1.74474449, -0.38935541, ..., -1.61212515,
        -1.48544548,  0.28057537],
       [ 0.20923168,  0.22769377,  0.01273209, ..., -1.56825176,
        -1.40069891,  0.29649784],
       [ 1.39508604,  1.58316512,  1.36520822, ..., -1.52437837,
        -1.42894777, -0.59516041]])

### Entrenamiento de nuestra red

In [25]:
# Conjunto de entrenamiento y de validación

X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size= 0.2) 

In [26]:
print(X_train.shape,X_test.shape)

(142, 13) (36, 13)


In [27]:
y_test.value_counts()

1  2  3
1  0  0    13
0  0  1    12
   1  0    11
Name: count, dtype: int64

In [30]:
modelo = MLPClassifier(hidden_layer_sizes= (13,25,7),
                       activation= 'logistic',
                       solver= 'lbfgs')

In [31]:
modelo.fit(X_train, y_train) # Con adam no habia convergido

In [33]:
# Observemos el número de pesos por capa

print(modelo.coefs_[0].shape, modelo.coefs_[1].shape, modelo.coefs_[2].shape, modelo.coefs_[3].shape)

(13, 13) (13, 25) (25, 7) (7, 3)


In [34]:
# Evaluemos la tasa de precisión

modelo.score(X_test,y_test)

0.9444444444444444

In [35]:
# Realizando una evaluación mas completa (Classification report)

predicted = modelo.predict(X_test)

In [36]:
predictedClass = np.argmax(predicted, axis=1)
y_testClass = np.argmax(y_test.values,axis=1)

In [37]:
predictedClass, y_testClass

(array([2, 1, 2, 0, 2, 1, 1, 0, 2, 2, 0, 2, 0, 0, 2, 0, 1, 2, 2, 0, 1, 2,
        2, 1, 0, 2, 0, 2, 1, 2, 1, 0, 0, 1, 1, 0], dtype=int64),
 array([1, 1, 2, 0, 2, 1, 1, 0, 2, 2, 0, 2, 0, 0, 2, 0, 1, 0, 2, 0, 1, 2,
        2, 1, 0, 2, 0, 2, 1, 2, 1, 0, 0, 1, 1, 0], dtype=int64))

In [38]:
print(classification_report(y_testClass,predictedClass))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        13
           1       1.00      0.91      0.95        11
           2       0.86      1.00      0.92        12

    accuracy                           0.94        36
   macro avg       0.95      0.94      0.95        36
weighted avg       0.95      0.94      0.95        36



In [39]:
print(confusion_matrix(y_testClass, predictedClass))

[[12  0  1]
 [ 0 10  1]
 [ 0  0 12]]


### Segundo ejemplo: regresión

In [53]:
from sklearn.neural_network import MLPRegressor

In [41]:
# El probelma es de definición de presio de una casa

X = fetch_california_housing().data
y = fetch_california_housing().target

In [42]:
X.shape

(20640, 8)

In [43]:
fetch_california_housing().feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [45]:
X[0:5]

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02],
       [ 5.64310000e+00,  5.20000000e+01,  5.81735160e+00,
         1.07305936e+00,  5.58000000e+02,  2.54794521e+00,
         3.78500000e+01, -1.22250000e+02],
       [ 3.84620000e+00,  5.20000000e+01,  6.28185328e+00,
         1.08108108e+00,  5.65000000e+02,  2.18146718e+00,
         3.78500000e+01, -1.22250000e+02]])

In [46]:
scaler = StandardScaler()
scaler.fit(x)

In [47]:
x_data = scaler.transform(x)

In [48]:
x_data

array([[ 2.34476576,  0.98214266,  0.62855945, ..., -0.04959654,
         1.05254828, -1.32783522],
       [ 2.33223796, -0.60701891,  0.32704136, ..., -0.09251223,
         1.04318455, -1.32284391],
       [ 1.7826994 ,  1.85618152,  1.15562047, ..., -0.02584253,
         1.03850269, -1.33282653],
       ...,
       [-1.14259331, -0.92485123, -0.09031802, ..., -0.0717345 ,
         1.77823747, -0.8237132 ],
       [-1.05458292, -0.84539315, -0.04021111, ..., -0.09122515,
         1.77823747, -0.87362627],
       [-0.78012947, -1.00430931, -0.07044252, ..., -0.04368215,
         1.75014627, -0.83369581]])

In [56]:
# Eliminamos unos datos por facilidad

x_data = x_data[:,:6]

In [57]:
X_train,X_test,y_train,y_test = train_test_split(x_data,y,test_size=0.2)

In [58]:
print(X_train.shape,X_test.shape)

(16512, 6) (4128, 6)


In [61]:
modelo = MLPRegressor(hidden_layer_sizes=(6,11,3),
                       activation="relu",
                         solver="adam")

In [62]:
modelo.fit(X_train, y_train)

In [63]:
print(modelo.coefs_[0].shape, modelo.coefs_[1].shape, modelo.coefs_[2].shape, modelo.coefs_[3].shape)

(6, 6) (6, 11) (11, 3) (3, 1)


In [64]:
modelo.score(X_test,y_test)

0.6878638222475213

In [65]:
predicted = modelo.predict(X_test)

In [67]:
from sklearn.metrics import r2_score,mean_squared_log_error

r2_score(y_test,predicted)

0.6878638222475213

In [68]:
mean_squared_log_error(y_test,predicted)

0.041771462628811526