In [21]:
# Cálculo científico y vectorial para python
import numpy as np
# Libreria para graficos
from matplotlib import pyplot
# Modulo de optimizacion en scipy
from scipy import optimize
# modulo para cargar archivos en formato MATLAB
from scipy.io import loadmat
import pandas as pd

In [22]:
# cargar a una variable el dataset
data = pd.read_csv("winequality-red.csv")
data.head()

# este data set tiene las siguientes caracteristicas
# 1) acidez
# 2) 

Unnamed: 0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
0,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
1,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
2,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
3,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
4,7.4,0.66,0.0,1.8,0.075,13.0,40.0,0.9978,3.51,0.56,9.4,5


In [23]:
# Ingresar imágenes de dígitos de 20x20
input_layer_size  = 400

# 10 etiquetas, de 1 a 10 (tomar en cuenta que se asigna "0" a la etiqueta 10)
num_labels = 11
data = np.loadtxt('winequality-red.csv', delimiter=',')
X, y = data[:, 0:11], data[:, 11]
print(X)
print(y)

m = y.size

[[ 7.4    0.7    0.    ...  3.51   0.56   9.4  ]
 [ 7.8    0.88   0.    ...  3.2    0.68   9.8  ]
 [ 7.8    0.76   0.04  ...  3.26   0.65   9.8  ]
 ...
 [ 6.3    0.51   0.13  ...  3.42   0.75  11.   ]
 [ 5.9    0.645  0.12  ...  3.57   0.71  10.2  ]
 [ 6.     0.31   0.47  ...  3.39   0.66  11.   ]]
[5. 5. 5. ... 6. 5. 6.]


In [24]:
# funcion para la normalizacion de caracteristicas
def  featureNormalize(X):
    X_norm = X.copy()
    media_norm = np.zeros(X.shape[1])
    sigma = np.zeros(X.shape[1])

    media_norm = np.mean(X, axis = 0)
    sigma = np.std(X, axis = 0)
    X_norm = (X - media_norm) / sigma
    
    return X_norm, media_norm, sigma

In [25]:
X_norm, media_norm, sigma = featureNormalize(X)

In [26]:
X = X_norm
# X = X_datos
print(X[0])

[-0.52835961  0.96187667 -1.39147228 -0.45321841 -0.24370669 -0.46619252
 -0.37913269  0.55827446  1.28864292 -0.57920652 -0.96024611]


In [27]:
def sigmoid(z):
    """
    Calcula la sigmoide de z.
    """
    return 1.0 / (1.0 + np.exp(-z))

In [28]:
a = sigmoid(2)
print(a)

0.8807970779778823


In [29]:
def lrCostFunction(theta, X, y, lambda_):
    # Inicializa algunos valores utiles
    m = y.size

    # convierte las etiquetas a valores enteros si son boleanos
    if y.dtype == bool:
        y = y.astype(int)
    
    J = 0
    grad = np.zeros(theta.shape)
    
    h = sigmoid(X.dot(theta.T))
    
    temp = theta
    temp[0] = 0
    
    J = (1 / m) * np.sum(-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) + (lambda_ / (2 * m)) * np.sum(np.square(temp))
    
    grad = (1 / m) * (h - y).dot(X) 
    grad = grad + (lambda_ / m) * temp

    return J, grad

In [30]:
def oneVsAll(X, y, num_labels, lambda_):
    # algunas variables utiles
    m, n = X.shape
    
    all_theta = np.zeros((num_labels, n + 1))

    # Agrega unos a la matriz X
    X = np.concatenate([np.ones((m, 1)), X], axis=1)

    for c in np.arange(num_labels):
        initial_theta = np.zeros(n + 1)
        options = {'maxiter': 50}
        res = optimize.minimize(lrCostFunction, 
                                initial_theta, 
                                (X, (y == c), lambda_), 
                                jac=True, 
                                method='CG',
                                options=options) 
        
        all_theta[c] = res.x

    return all_theta

In [31]:
lambda_ = 0.1
all_theta = oneVsAll(X, y, num_labels, lambda_)

In [32]:
def predictOneVsAll(all_theta, X):
    m = X.shape[0];
    num_labels = all_theta.shape[0]

    p = np.zeros(m)

    # Add ones to the X data matrix
    X = np.concatenate([np.ones((m, 1)), X], axis=1)
    p = np.argmax(sigmoid(X.dot(all_theta.T)), axis = 1)

    return p

In [34]:
print(X.shape)
pred = predictOneVsAll(all_theta, X)
print('Precision del conjuto de entrenamiento: {:.2f}%'.format(np.mean(pred == y) * 100))
XPrueba = X[200:201, :].copy()
print(XPrueba.shape)

XPrueba = np.concatenate([np.ones((1, 1)), XPrueba], axis=1)
print(XPrueba.shape)
p = np.argmax(sigmoid(XPrueba.dot(all_theta.T)), axis = 1)
print('Calidad del vino',p)
print('Valor esperado:',y[200])

(1599, 11)
Precision del conjuto de entrenamiento: 60.48%
(1, 11)
(1, 12)
Calidad del vino [6]
Valor esperado: 7.0
