## Machine Learning Project on UCI Parkinsons Telemonitoring Data Set

Importación de librerías:

In [1]:
from __future__ import division

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import numpy.matlib as matlib

from numpy import random
import math

import pandas as pd
#!pip install qgrid
import qgrid

import scipy as sc
from scipy.spatial import distance
from scipy import stats

#algunas advertencias que queremos evitar
import warnings
warnings.filterwarnings("always")

Cargamos la base de datos:

In [2]:
#Cargamos la bd que está en un archivo .data y ahora la podemos manejar de forma matricial
db = np.loadtxt('DB/parkinsons_updrs.data', delimiter=',', skiprows=1)  # Assuming ',' delimiter

#X: Toma todas las filas (muestras) y las columnas 6-21 (características)
X = db[:,6:22]
#Y: Toma todas las filas y la columna 4, corresponde a la salida de la regresión
Y = db[:,4]
#G: Toma todas las filas y la columna 0, corresponde a la asociación en grupos del dataset
G = db[:,0]

In [3]:
print(X.shape)
print(Y.shape)
print(G.shape)

(5875, 16)
(5875,)
(5875,)


Medidas de error para evaluar los métodos de regresión:

In [23]:
#Mean Square Error
def MSE(Y_est,Y):
    N = np.size(Y)
    mse = np.sum((Y_est.reshape(N,1) - Y.reshape(N,1))**2)/N
    return mse

Mean Percentage Error
def MAE(Y_est,Y):
    N = np.size(Y)
    mae = np.sum(abs(Y_est.reshape(N,1) - Y.reshape(N,1)))/N
    return mae

#Mean Absolute Percentage Error
def MAPE(Y_est,Y):
    N = np.size(Y)
    mape = np.sum(abs((Y_est.reshape(N,1) - Y.reshape(N,1))/Y.reshape(N,1)))/N
    return mape

#Root Mean Square Error
def RMSE(Y_est,Y):
    N = np.size(Y)
    rmse = math.sqrt(np.sum((Y_est.reshape(N,1) - Y.reshape(N,1))**2)/N)
    return rmse

#Coeficiente de determinación
def R2(Y_est,Y):
    N = np.size(Y)
    SST = np.sum((Y_est.reshape(N,1) - Y.reshape(N,1))**2)
    average = np.sum(Y.reshape(N,1))/N
    SSE = np.sum((average - Y.reshape(N,1))**2)
    R2 = 1 - (SSE/SST)
    return R2


#### Regresión Polinomial Múltiple

In [10]:
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

def PolynomialRegression(degree):
    iterations = 10
    random.seed(19680801)
    errorMAE = np.zeros(iterations)
    errorMAPE = np.zeros(iterations)
    errorRMSE = np.zeros(iterations)
    errorR2 = np.zeros(iterations)

    gss = GroupShuffleSplit(n_splits=iterations, train_size=.7)
    j=0
    for train_idx, test_idx in gss.split(X, Y, G):
        Xtrain = X[train_idx,:]
        Ytrain = Y[train_idx]
        Xtest = X[test_idx,:]
        Ytest = Y[test_idx]

        #Normalizamos los datos
        media = np.mean(Xtrain)
        desvia = np.std(Xtrain)
        Xtrain = sc.stats.stats.zscore(Xtrain)
        Xtest = (Xtest - np.matlib.repmat(media, Xtest.shape[0], 1))/np.matlib.repmat(desvia, Xtest.shape[0], 1)

        #Creación del modelo
        polynomial_features = PolynomialFeatures(degree=degree)
        Xtrain_poly = polynomial_features.fit_transform(Xtrain) # transforms the existing features to higher degree features.

        # fit the transformed features to Linear Regression
        model = LinearRegression()
        model.fit(Xtrain_poly, Ytrain)

        #Validación
        Yest = model.predict(polynomial_features.fit_transform(Xtest))
        #Uso del modelo previamente entrenado para hacer predicciones con las muestras de test
        #Se escalan los datos de acuerdo a como se entrenó el modelo para predecir correctamente

        #Evaluamos las predicciones del modelo con los datos de Ytest
        errorMAE[j] = MAE(Yest,Ytest)
        errorMAPE[j] = MAPE(Yest,Ytest)
        errorRMSE[j] = RMSE(Yest,Ytest)
        errorR2[j] = R2(Yest,Ytest)
        j += 1

    mae = np.mean(errorMAE)
    std_mae = np.std(errorMAE)
    mape = np.mean(errorMAPE)
    std_mape = np.std(errorMAPE)
    rmse = np.mean(errorRMSE)
    std_rmse = np.std(errorRMSE)
    r2 = np.mean(errorR2)
    std_r2 = np.std(errorR2)
    return(mae, std_mae, mape, std_mape, rmse, std_rmse, r2, std_r2)


In [None]:
PolynomialRegression(3)

#### Redes Neuronales Artificiales

In [21]:
from sklearn.model_selection import GroupShuffleSplit
from sklearn.neural_network import MLPRegressor

def ANN(layers, neurons, epochs):
    iterations = 10
    random.seed(19680801)
    errorMAE = np.zeros(iterations)
    errorMAPE = np.zeros(iterations)
    errorRMSE = np.zeros(iterations)
    errorR2 = np.zeros(iterations)

    gss = GroupShuffleSplit(n_splits=iterations, train_size=.7)
    j=0
    for train_idx, test_idx in gss.split(X, Y, G):
        Xtrain = X[train_idx,:]
        Ytrain = Y[train_idx]
        Xtest = X[test_idx,:]
        Ytest = Y[test_idx]

        #Normalizamos los datos
        media = np.mean(Xtrain)
        desvia = np.std(Xtrain)
        Xtrain = sc.stats.stats.zscore(Xtrain)
        Xtest = (Xtest - np.matlib.repmat(media, Xtest.shape[0], 1))/np.matlib.repmat(desvia, Xtest.shape[0], 1)

        #Creación del modelo         
        numberOfNeurons = []
        for i in range(layers):
            numberOfNeurons.append(neurons)
            
        mlp = MLPRegressor(hidden_layer_sizes=numberOfNeurons, activation='tanh', max_iter=epochs).fit(Xtrain, Ytrain)

        #Validación
        Yest = mlp.predict(Xtest)

        #Evaluamos las predicciones del modelo con los datos de Ytest
        errorMAE[j] = MAE(Yest,Ytest)
        errorMAPE[j] = MAPE(Yest,Ytest)
        errorRMSE[j] = RMSE(Yest,Ytest)
        errorR2[j] = R2(Yest,Ytest)
        j += 1

    mae = np.mean(errorMAE)
    std_mae = np.std(errorMAE)
    mape = np.mean(errorMAPE)
    std_mape = np.std(errorMAPE)
    rmse = np.mean(errorRMSE)
    std_rmse = np.std(errorRMSE)
    r2 = np.mean(errorR2)
    std_r2 = np.std(errorR2)
    return(mae, std_mae, mape, std_mape, rmse, std_rmse, r2, std_r2)


In [None]:
ANN(3, 28, 500)