###Descripción del Dataset PIMA DIABETES


In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [58]:
diabetes = pd.read_csv('diabetes.csv')
print(diabetes.shape)

(768, 9)


In [59]:
diabetes.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [None]:
diabetes.head()

In [None]:
diabetes.isnull().sum()

###Análisis Exploratorio de Datos

In [None]:
import seaborn as sns
sns.pairplot(diabetes, hue = 'Outcome', height= 2, palette='colorblind');

In [None]:
#Para entender el comportamiento de la gráfica de pares
import seaborn as sns
sns.pairplot(diabetes, hue = 'Pregnancies', height= 2, palette='colorblind');

In [None]:
fig, ax = plt.subplots(figsize=(10,10))  
sns.heatmap(diabetes.corr(), annot= True, square=True,ax=ax);

###Preparación de los datos


In [60]:
#Separación entre entrenamiento y test
from sklearn.model_selection import train_test_split

train, test = train_test_split(diabetes, stratify = diabetes['Outcome'], test_size = 0.3, random_state = 100)

print(train.shape, test.shape)

(537, 9) (231, 9)


In [61]:
#Separación de los conjuntos de datos
X_train = np.c_[train['Pregnancies'], train['Glucose'],train['BloodPressure'],train['SkinThickness'],train['Insulin'],train['BMI'],train['DiabetesPedigreeFunction'],train['Age']]
y_train = train['Outcome'].values

X_test = np.c_[test['Pregnancies'], test['Glucose'],test['BloodPressure'],test['SkinThickness'],test['Insulin'],test['BMI'],test['DiabetesPedigreeFunction'],test['Age']]
y_test = test['Outcome'].values

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(537, 8) (537,)
(231, 8) (231,)


###Modelo con  Scikit-Learn

In [62]:
#Definición del modelo
from sklearn.neural_network import MLPClassifier

red_skl = MLPClassifier(hidden_layer_sizes = (64,32,16,8,4,2,1), activation = 'logistic', solver = 'sgd', max_iter = 100000000)
red_skl.fit(X_train, y_train)

In [63]:
#evaluación
from sklearn.metrics import classification_report, confusion_matrix

y_train_pred = red_skl.predict(X_train)
y_test_pred = red_skl.predict(X_test)

print("Rendimiento en Entrenamiento")
print(confusion_matrix(y_train, y_train_pred))
print(classification_report(y_train, y_train_pred))


print("Rendimiento en Testeo")
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred))

Rendimiento en Entrenamiento
[[350   0]
 [187   0]]
              precision    recall  f1-score   support

           0       0.65      1.00      0.79       350
           1       0.00      0.00      0.00       187

    accuracy                           0.65       537
   macro avg       0.33      0.50      0.39       537
weighted avg       0.42      0.65      0.51       537

Rendimiento en Testeo
[[150   0]
 [ 81   0]]
              precision    recall  f1-score   support

           0       0.65      1.00      0.79       150
           1       0.00      0.00      0.00        81

    accuracy                           0.65       231
   macro avg       0.32      0.50      0.39       231
weighted avg       0.42      0.65      0.51       231



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


###Crear Modelo con PyTorch

In [64]:
#Importar los módulos

import torch
import torch.nn as nn
import torch.nn.functional as F

X_train_t = torch.FloatTensor(X_train)
X_test_t = torch.FloatTensor(X_test)

y_train_t = torch.LongTensor(y_train)
y_test_t = torch.LongTensor(y_test)

In [85]:
#Crear modelo
class red_torch(nn.Module):
  def __init__(self, input_features = 8, hidden1 = 16, hidden2 = 32, hidden3 = 16, out_features = 2):
    super().__init__()
    self.f_conected1 = nn.Linear(input_features, hidden1)
    self.f_conected2 = nn.Linear(hidden1, hidden2)
    self.f_conected3 = nn.Linear(hidden2, hidden3)
    self.out = nn.Linear(hidden3, out_features)

  def forward(self, x):
    x = F.relu(self.f_conected1(x))
    x = F.relu(self.f_conected2(x))
    x = F.relu(self.f_conected3(x))
    x = self.out(x)
    return x

In [86]:
#instanciación del modelo
torch.manual_seed(100)
red_pytorch = red_torch()
red_pytorch.parameters()

<generator object Module.parameters at 0x32274fa00>

In [87]:
#Entrenamiento
costo = nn.CrossEntropyLoss()
optimizador = torch.optim.SGD(red_pytorch.parameters(), lr = 0.001)
epochs = 2000
costos_totales = []

for i in range(epochs):
  y_pred_t = red_pytorch.forward(X_train_t)
  c = costo(y_pred_t, y_train_t)
  costos_totales.append(c)

  if i%10 == 1:
    print('Generacion: {} con costo: {}'.format(i,c.item()))

  optimizador.zero_grad()
  c.backward()
  optimizador.step()


Generacion: 1 con costo: 1.0893666744232178
Generacion: 11 con costo: 0.8439814448356628
Generacion: 21 con costo: 0.7496344447135925
Generacion: 31 con costo: 0.7069353461265564
Generacion: 41 con costo: 0.685757577419281
Generacion: 51 con costo: 0.6746235489845276
Generacion: 61 con costo: 0.6676879525184631
Generacion: 71 con costo: 0.6629685759544373
Generacion: 81 con costo: 0.6594529747962952
Generacion: 91 con costo: 0.65700364112854
Generacion: 101 con costo: 0.6552020907402039
Generacion: 111 con costo: 0.6541281938552856
Generacion: 121 con costo: 0.6531376242637634
Generacion: 131 con costo: 0.6520599722862244
Generacion: 141 con costo: 0.6510962843894958
Generacion: 151 con costo: 0.6503164172172546
Generacion: 161 con costo: 0.6496196985244751
Generacion: 171 con costo: 0.6489391922950745
Generacion: 181 con costo: 0.6482264995574951
Generacion: 191 con costo: 0.6476097702980042
Generacion: 201 con costo: 0.647033154964447
Generacion: 211 con costo: 0.6465743780136108
Gen

Generacion: 1981 con costo: 0.6071782112121582
Generacion: 1991 con costo: 0.6070157885551453


In [None]:
plt.plot(range(epochs), costos_totales)
plt.ylabel('Costos')
plt.xlabel('Generaciones')

In [88]:
#Predecir
predicciones = []

with torch.no_grad():
  for i, data in enumerate(X_test_t):
    y_pred_test = red_pytorch(data)
    predicciones.append(y_pred_test.argmax().item())

predicciones

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [89]:
#Evaluación de testeo
print(confusion_matrix(y_test, predicciones))
print(classification_report(y_test, predicciones))

[[150   0]
 [ 80   1]]
              precision    recall  f1-score   support

           0       0.65      1.00      0.79       150
           1       1.00      0.01      0.02        81

    accuracy                           0.65       231
   macro avg       0.83      0.51      0.41       231
weighted avg       0.77      0.65      0.52       231



###Crear Modelo con Keras/Tensorflow

In [None]:
#importar modulo

from keras.models import Sequential
from keras.layers import Dense

In [None]:
#Crear el modelo

red_keras = Sequential()
red_keras.add(Dense(8, input_dim = 8, activation= 'relu'))#entrada
red_keras.add(Dense(8, activation= 'relu'))
red_keras.add(Dense(4, activation= 'relu'))
red_keras.add(Dense(1, activation= 'sigmoid'))

red_keras.compile(loss = 'binary_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])

In [None]:
#entrenamiento

historial = red_keras.fit(X_train, y_train, epochs = 2000, batch_size = 10, validation_data = (X_test, y_test))

In [None]:
#Evaluación
evaluacion = red_keras.evaluate(X_test,y_test)
print("%s: %.2f%%" % (red_keras.metrics_names[1], evaluacion[1]*100))

In [None]:
predicciones = red_keras.predict(X_test)
predicciones = np.argmax(predicciones, axis=1)

print(confusion_matrix(y_test, predicciones))
print(classification_report(y_test, predicciones))

In [None]:
fig, ax = plt.subplots()
ax.plot(historial.history["loss"], 'r', marker='.', label = 'loss')
ax.plot(historial.history["val_loss"], 'b', marker='.', label = 'validación')
ax.legend();

In [None]:
fig, ax = plt.subplots()
ax.plot(historial.history["accuracy"], 'r', marker='.', label = 'accuracy')
ax.plot(historial.history["val_accuracy"], 'b', marker='.', label = 'validación')
ax.legend();