# Paciente post-operatorio

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sb
import sklearn
import random as rd
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.datasets import make_classification
from sklearn import metrics 

### Conjunto de datos

In [None]:
post_oper = pd.read_csv("post-operative.data",sep=',')
post_oper.head(200)

Unnamed: 0,mid,low,excellent,mid.1,stable,stable.1,stable.2,15,A
0,mid,high,excellent,high,stable,stable,stable,10,S
1,high,low,excellent,high,stable,stable,mod-stable,10,A
2,mid,low,good,high,stable,unstable,mod-stable,15,A
3,mid,mid,excellent,high,stable,stable,stable,10,A
4,high,low,good,mid,stable,stable,unstable,15,S
...,...,...,...,...,...,...,...,...,...
84,mid,mid,excellent,mid,unstable,stable,stable,10,A
85,mid,mid,excellent,mid,unstable,stable,stable,15,S
86,mid,mid,good,mid,unstable,stable,stable,15,A
87,mid,mid,excellent,mid,unstable,stable,stable,10,A


In [None]:
post_oper.shape

(89, 9)

### Cambios bbdd

Abrimos el archivo para poder visualizar la información. Nos dimos cuenta que las columnas no tenían asignado un nombre por ende las siguientes lineas de código son para agregarle un nombre a cada columna.

In [None]:
post_oper.columns = ['L-CORE', 'L-SURF', 'L-02', 'L-BP', 'SURF-STBL', 'CORE-STBL', 'BP-STBL', 'COMFORT', 'ADM-DECS']
post_oper.loc[89]= ['mid', 'low', 'excellent', 'mid', 'stable', 'stable', 'stable',
       '15', 'A']

#Acá eliminamos las filas que contenian información nula.
post_oper['COMFORT'] = post_oper['COMFORT'].replace('?', np.nan)
post_oper = post_oper.dropna(axis=0, subset=['COMFORT'])
post_oper


Unnamed: 0,L-CORE,L-SURF,L-02,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT,ADM-DECS
0,mid,high,excellent,high,stable,stable,stable,10,S
1,high,low,excellent,high,stable,stable,mod-stable,10,A
2,mid,low,good,high,stable,unstable,mod-stable,15,A
3,mid,mid,excellent,high,stable,stable,stable,10,A
4,high,low,good,mid,stable,stable,unstable,15,S
...,...,...,...,...,...,...,...,...,...
85,mid,mid,excellent,mid,unstable,stable,stable,15,S
86,mid,mid,good,mid,unstable,stable,stable,15,A
87,mid,mid,excellent,mid,unstable,stable,stable,10,A
88,mid,mid,good,mid,unstable,stable,stable,15,S


### Visualización de datos

In [None]:
post_oper.columns
post_oper.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 87 entries, 0 to 89
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   L-CORE     87 non-null     object
 1   L-SURF     87 non-null     object
 2   L-02       87 non-null     object
 3   L-BP       87 non-null     object
 4   SURF-STBL  87 non-null     object
 5   CORE-STBL  87 non-null     object
 6   BP-STBL    87 non-null     object
 7   COMFORT    87 non-null     object
 8   ADM-DECS   87 non-null     object
dtypes: object(9)
memory usage: 6.8+ KB


In [None]:
post_oper.shape

(87, 9)

In [None]:
post_oper.describe().T

Unnamed: 0,count,unique,top,freq
L-CORE,87,3,mid,57
L-SURF,87,3,mid,47
L-02,87,2,good,46
L-BP,87,3,mid,56
SURF-STBL,87,2,stable,44
CORE-STBL,87,3,stable,81
BP-STBL,87,3,stable,45
COMFORT,87,4,10,65
ADM-DECS,87,4,A,61


In [None]:
#Para poder aplicar los distintos métodos primero transformamos la variables a numéricas.
post_oper['L-CORE']= post_oper['L-CORE'].replace(['low','mid','high'],[0,1,2])
post_oper['L-SURF']= post_oper['L-SURF'].replace(['low','mid','high'],[0,1,2])
post_oper['L-02']= post_oper['L-02'].replace(['good','excellent'],[0,1])
post_oper['L-BP']= post_oper['L-BP'].replace(['low','mid','high'],[0,1,2])
post_oper['SURF-STBL']= post_oper['SURF-STBL'].replace(['stable','unstable'],[0,1])
post_oper['CORE-STBL']= post_oper['CORE-STBL'].replace(['stable','mod-stable','unstable'],[0,1,2])
post_oper['COMFORT']= post_oper['COMFORT'].replace(['10','15','05','07'],[10,15,5,7])
post_oper['ADM-DECS']= post_oper['ADM-DECS'].replace(['A','A ','S','I'],[0,0,1,2])
post_oper['BP-STBL']= post_oper['BP-STBL'].replace(['stable','mod-stable','unstable'],[0,1,2])
post_oper


Unnamed: 0,L-CORE,L-SURF,L-02,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT,ADM-DECS
0,1,2,1,2,0,0,0,10,1
1,2,0,1,2,0,0,1,10,0
2,1,0,0,2,0,2,1,15,0
3,1,1,1,2,0,0,0,10,0
4,2,0,0,1,0,0,2,15,1
...,...,...,...,...,...,...,...,...,...
85,1,1,1,1,1,0,0,15,1
86,1,1,0,1,1,0,0,15,0
87,1,1,1,1,1,0,0,10,0
88,1,1,0,1,1,0,0,15,1


In [None]:
post_oper.describe()
#post_oper.drop(['COMFORT'],1).hist()
#plt.show()

Unnamed: 0,L-CORE,L-SURF,L-02,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT,ADM-DECS
count,87.0,87.0,87.0,87.0,87.0,87.0,87.0,87.0,87.0
mean,0.931034,0.908046,0.471264,1.287356,0.494253,0.126437,0.724138,10.942529,0.298851
std,0.586537,0.675694,0.502067,0.526243,0.502865,0.477508,0.830937,2.304477,0.485006
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
25%,1.0,0.0,0.0,1.0,0.0,0.0,0.0,10.0,0.0
50%,1.0,1.0,0.0,1.0,0.0,0.0,0.0,10.0,0.0
75%,1.0,1.0,1.0,2.0,1.0,0.0,1.0,10.0,1.0
max,2.0,2.0,1.0,2.0,1.0,2.0,2.0,15.0,2.0


### Modelo Bayes ingenuo

In [None]:
#ya que los datos de encuentran de forma numérica, podemos ocupar el método de bayes ingenuo miltinominal.
X_train, X_test, y_train, y_test = train_test_split(post_oper, post_oper['ADM-DECS']  )     


In [None]:
X_train
model = MultinomialNB().fit(X_train, y_train)
#para saber que tan eficiente fue el modelo, vamos a predecir el resultado de ciertos datos.
predicted = model.predict(X_test)
print('Precisión del modelo:')
print(np.mean(predicted == y_test))
#Verifico la matriz de Confusión
matriz = confusion_matrix(y_test, predicted)
print('Matriz de Confusión:')
print(matriz)
print(classification_report(y_test, predicted))

Precisión del modelo:
1.0
Matriz de Confusión:
[[15  0]
 [ 0  7]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00         7

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22



Como se puede ver, el método tuvo una presición bastante alta, lo cual es muy bueno ya que es un método sencillo.

### Modelo con árboles de decisión

In [None]:
#Separo los datos de "train" en entrenamiento y prueba para probar los algoritmos
X_train, X_test, y_train, y_test = train_test_split(post_oper, post_oper['ADM-DECS'], test_size=0.2)

In [None]:
algoritmo1 = DecisionTreeClassifier(criterion = 'entropy')
#Entreno el modelo
algoritmo1.fit(X_train, y_train)
#Realizo una predicción
#predicted2 = algoritmo1.predict(X_test)
#para saber que tan eficiente fue el modelo, vamos a predecir el resultado de ciertos datos.
predicted2 = model.predict(X_test)
print('Precisión del modelo:')
print(np.mean(predicted2 == y_test))
#Verifico la matriz de Confusión
matriz2 = confusion_matrix(y_test, predicted2)
print('Matriz de Confusión:')
print(matriz2)
print(classification_report(y_test, predicted2))

Precisión del modelo:
0.9444444444444444
Matriz de Confusión:
[[13  0  0]
 [ 0  4  0]
 [ 0  1  0]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.80      1.00      0.89         4
           2       0.00      0.00      0.00         1

    accuracy                           0.94        18
   macro avg       0.60      0.67      0.63        18
weighted avg       0.90      0.94      0.92        18

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
algoritmo1 = DecisionTreeClassifier(criterion = 'gini')
#Entreno el modelo
algoritmo1.fit(X_train, y_train)
#Realizo una predicción
#predicted2 = algoritmo1.predict(X_test)
#para saber que tan eficiente fue el modelo, vamos a predecir el resultado de ciertos datos.
predicted2 = model.predict(X_test)
print('Precisión del modelo:')
print(np.mean(predicted2 == y_test))
#Verifico la matriz de Confusión
matriz2 = confusion_matrix(y_test, predicted2)
print('Matriz de Confusión:')
print(matriz2)
print(classification_report(y_test, predicted2))

Precisión del modelo:
0.9444444444444444
Matriz de Confusión:
[[13  0  0]
 [ 0  4  0]
 [ 0  1  0]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.80      1.00      0.89         4
           2       0.00      0.00      0.00         1

    accuracy                           0.94        18
   macro avg       0.60      0.67      0.63        18
weighted avg       0.90      0.94      0.92        18

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Modelo SVM

In [None]:
#Separo los datos de "train" en entrenamiento y prueba para probar los algoritmos
X_train, X_test, y_train, y_test = train_test_split(post_oper, post_oper['ADM-DECS'], test_size=0.2)

#Defino el algoritmo a utilizar
algoritmo2 = SVC(kernel = 'linear')
#Entreno el modelo
algoritmo2.fit(X_train, y_train)
#Realizo una predicción
#para saber que tan eficiente fue el modelo, vamos a predecir el resultado de ciertos datos.
predicted3 = model.predict(X_test)
print('Precisión del modelo:')
print(np.mean(predicted3 == y_test))
#Verifico la matriz de Confusión
matriz3 = confusion_matrix(y_test, predicted3)
print('Matriz de Confusión:')
print(matriz3)
print(classification_report(y_test, predicted3))


Precisión del modelo:
0.9444444444444444
Matriz de Confusión:
[[13  0  0]
 [ 0  4  0]
 [ 0  1  0]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.80      1.00      0.89         4
           2       0.00      0.00      0.00         1

    accuracy                           0.94        18
   macro avg       0.60      0.67      0.63        18
weighted avg       0.90      0.94      0.92        18

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#algoritmo polinomial
svclassifier = SVC(kernel="poly", degree=8)
svclassifier.fit(X_train, y_train)
predicted4 = svclassifier.predict(X_test)
print(confusion_matrix(y_test, predicted4))
print(classification_report(y_test, predicted4))


[[13  0  0]
 [ 3  1  0]
 [ 0  1  0]]
              precision    recall  f1-score   support

           0       0.81      1.00      0.90        13
           1       0.50      0.25      0.33         4
           2       0.00      0.00      0.00         1

    accuracy                           0.78        18
   macro avg       0.44      0.42      0.41        18
weighted avg       0.70      0.78      0.72        18

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#algoritmo gaussiano
svclassifier = SVC(kernel="rbf")
svclassifier.fit(X_train, y_train)
predicted5 = svclassifier.predict(X_test)
print(confusion_matrix(y_test, predicted5))
print(classification_report(y_test, predicted5))


[[13  0  0]
 [ 4  0  0]
 [ 1  0  0]]
              precision    recall  f1-score   support

           0       0.72      1.00      0.84        13
           1       0.00      0.00      0.00         4
           2       0.00      0.00      0.00         1

    accuracy                           0.72        18
   macro avg       0.24      0.33      0.28        18
weighted avg       0.52      0.72      0.61        18

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Modelo K-vecinos cercanos

In [None]:
post_oper2 = pd.read_csv("post-operative.data",sep=',')
post_oper2.head(200)

Unnamed: 0,mid,low,excellent,mid.1,stable,stable.1,stable.2,15,A
0,mid,high,excellent,high,stable,stable,stable,10,S
1,high,low,excellent,high,stable,stable,mod-stable,10,A
2,mid,low,good,high,stable,unstable,mod-stable,15,A
3,mid,mid,excellent,high,stable,stable,stable,10,A
4,high,low,good,mid,stable,stable,unstable,15,S
...,...,...,...,...,...,...,...,...,...
84,mid,mid,excellent,mid,unstable,stable,stable,10,A
85,mid,mid,excellent,mid,unstable,stable,stable,15,S
86,mid,mid,good,mid,unstable,stable,stable,15,A
87,mid,mid,excellent,mid,unstable,stable,stable,10,A


In [None]:
post_oper2.columns = ['L-CORE', 'L-SURF', 'L-02', 'L-BP', 'SURF-STBL', 'CORE-STBL', 'BP-STBL', 'COMFORT', 'ADM-DECS']
post_oper2.loc[89]= ['mid', 'low', 'excellent', 'mid', 'stable', 'stable', 'stable',
       '15', 'A']
print(post_oper2)

   L-CORE L-SURF       L-02  L-BP SURF-STBL CORE-STBL     BP-STBL COMFORT  \
0     mid   high  excellent  high    stable    stable      stable      10   
1    high    low  excellent  high    stable    stable  mod-stable      10   
2     mid    low       good  high    stable  unstable  mod-stable      15   
3     mid    mid  excellent  high    stable    stable      stable      10   
4    high    low       good   mid    stable    stable    unstable      15   
..    ...    ...        ...   ...       ...       ...         ...     ...   
85    mid    mid  excellent   mid  unstable    stable      stable      15   
86    mid    mid       good   mid  unstable    stable      stable      15   
87    mid    mid  excellent   mid  unstable    stable      stable      10   
88    mid    mid       good   mid  unstable    stable      stable      15   
89    mid    low  excellent   mid    stable    stable      stable      15   

   ADM-DECS  
0         S  
1         A  
2        A   
3         A  
4    

In [None]:
post_oper2

Unnamed: 0,L-CORE,L-SURF,L-02,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT,ADM-DECS
0,mid,high,excellent,high,stable,stable,stable,10,S
1,high,low,excellent,high,stable,stable,mod-stable,10,A
2,mid,low,good,high,stable,unstable,mod-stable,15,A
3,mid,mid,excellent,high,stable,stable,stable,10,A
4,high,low,good,mid,stable,stable,unstable,15,S
...,...,...,...,...,...,...,...,...,...
85,mid,mid,excellent,mid,unstable,stable,stable,15,S
86,mid,mid,good,mid,unstable,stable,stable,15,A
87,mid,mid,excellent,mid,unstable,stable,stable,10,A
88,mid,mid,good,mid,unstable,stable,stable,15,S


In [None]:
def max_0(A):
    rows, columns =A.shape
    h=[]                          
    for i in range(0,rows):        
        h=h+[A[i][0]]             
    return max(h)
#ejemplo
max_0(np.array([[1,2],[3,4],[10,0],[11,20],[1,0]]))

11

In [None]:
covariables=np.zeros([90,8])
for i in range(0,90):
    if post_oper2.iloc[i].values[7] != '?' :
        covariables[i,7]=int(post_oper2.iloc[i].values[7])
    for j in range(0,4):
        if j != 2 :
            if post_oper2.iloc[i].values[j] == 'high' :
                covariables[i,j]=10
            elif post_oper2.iloc[i].values[j] == 'mid' :
                covariables[i,j]=0
            else :
                covariables[i,j]=-10
        else :
            if post_oper2.iloc[i].values[j] == 'excellent' :
                covariables[i,j]=10
            elif post_oper2.iloc[i].values[j] == 'good' :
                covariables[i,j]=3.3
            elif post_oper2.iloc[i].values[j] == 'fair' :
                covariables[i,j]=-3.3
            else:
                covariables[i,j]=-10
    for j in range(4,7):
        if post_oper2.iloc[i].values[j] == 'stable' :
                covariables[i,j]=10
        elif post_oper2.iloc[i].values[j] == 'mod-stable' :
                covariables[i,j]=0
        else :
                covariables[i,j]=-10
                
    
        
                
                
                
   
    
covariables 

array([[  0. ,  10. ,  10. ,  10. ,  10. ,  10. ,  10. ,  10. ],
       [ 10. , -10. ,  10. ,  10. ,  10. ,  10. ,   0. ,  10. ],
       [  0. , -10. ,   3.3,  10. ,  10. , -10. ,   0. ,  15. ],
       [  0. ,   0. ,  10. ,  10. ,  10. ,  10. ,  10. ,  10. ],
       [ 10. , -10. ,   3.3,   0. ,  10. ,  10. , -10. ,  15. ],
       [  0. , -10. ,  10. ,  10. ,  10. ,  10. ,   0. ,   5. ],
       [ 10. ,   0. ,  10. ,   0. , -10. , -10. ,  10. ,  10. ],
       [  0. ,  10. ,   3.3,   0. ,  10. ,  10. ,  10. ,  10. ],
       [  0. , -10. ,  10. ,   0. , -10. ,  10. ,   0. ,  10. ],
       [  0. ,   0. ,   3.3,   0. ,  10. ,  10. ,  10. ,  15. ],
       [  0. , -10. ,   3.3,  10. ,  10. ,  10. ,   0. ,  10. ],
       [ 10. ,  10. ,  10. ,  10. , -10. ,  10. , -10. ,  15. ],
       [  0. ,  10. ,   3.3,   0. , -10. ,  10. ,   0. ,  10. ],
       [  0. , -10. ,   3.3,  10. , -10. , -10. ,  10. ,  15. ],
       [ 10. ,  10. ,  10. ,  10. , -10. ,  10. , -10. ,  10. ],
       [-10. ,  10. ,   3

In [None]:
variable_respuesta=np.zeros(shape=90)
for i in range(0,90):
    if post_oper2.iloc[i].values[8] == 'S' :
        variable_respuesta[i]=-1
    elif post_oper2.iloc[i].values[8] == 'A' :
        variable_respuesta[i]=0
    else:
        variable_respuesta[i]=1
variable_respuesta

array([-1.,  0.,  1.,  0., -1., -1., -1., -1., -1.,  0.,  0.,  0.,  0.,
       -1.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,
        0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0., -1., -1., -1.,  0.,
        0., -1., -1., -1.,  0., -1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
       -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0., -1.,
        0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0., -1.,
        0.,  0., -1.,  1.,  0.,  0.,  0., -1.,  0.,  0., -1.,  0.])

In [None]:
covariables=np.delete(covariables,(45,47,69),axis=0)

In [None]:
variable_respuesta = np.delete(variable_respuesta,(45,47,69))

In [None]:
covariables_train,covariables_test,variable_respuesta_train,variable_respuesta_test = train_test_split( covariables,variable_respuesta , random_state=0)

In [None]:
n_neighbors = 4
 
knn = KNeighborsClassifier(n_neighbors)
knn.fit(covariables_train, variable_respuesta_train)
print('Accuracy of K-NN classifier on training set: {:.2f}'
     .format(knn.score(covariables_train, variable_respuesta_train)))
print('Accuracy of K-NN classifier on test set: {:.2f}'
     .format(knn.score(covariables_test, variable_respuesta_test)))

Accuracy of K-NN classifier on training set: 0.75
Accuracy of K-NN classifier on test set: 0.59


In [None]:
pred = knn.predict(covariables_test)
print(confusion_matrix(variable_respuesta_test, pred))
print(classification_report(variable_respuesta_test, pred))

[[ 0  6  0]
 [ 2 13  0]
 [ 0  1  0]]
              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00         6
         0.0       0.65      0.87      0.74        15
         1.0       0.00      0.00      0.00         1

    accuracy                           0.59        22
   macro avg       0.22      0.29      0.25        22
weighted avg       0.44      0.59      0.51        22

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#matriz de confusión comparable con los metodos anteriores
M=np.array([[13,2,0],[6,0,0],[1,0,0]])
M

array([[13,  2,  0],
       [ 6,  0,  0],
       [ 1,  0,  0]])

In [None]:
pred = knn.predict(covariables_test)
print(confusion_matrix(variable_respuesta_test, pred))
print(classification_report(variable_respuesta_test, pred))

[[ 0  6  0]
 [ 2 13  0]
 [ 0  1  0]]
              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00         6
         0.0       0.65      0.87      0.74        15
         1.0       0.00      0.00      0.00         1

    accuracy                           0.59        22
   macro avg       0.22      0.29      0.25        22
weighted avg       0.44      0.59      0.51        22

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
def predicción_vecinosmodificado(x):   #contando como vecino a si mismo 
      predict=np.zeros(shape=87)
      for l in range(0,87):    
              d=np.array([[0,0]])  #este np.array irá almacenando los x vecinos más cercanos (las distancias y
              for c in range(0,x-1):  #x es el número de vecinos #sus variables respuestas )
                  d=np.concatenate((d,np.array([[0,0]])))
             
              for i in range(0,87):
                  v=covariables[i]-covariables[l]
                  m=np.linalg.norm(v)    #distancia  entre nuestro dato k+1 (sumo 1 porque el conjunto de datos está                     
                  if i<x :
                                        #enumerado del 1 al 200 y no del 0 al 199) perteneciente al conjunto de validación
                       d[i]=[m,variable_respuesta[i]] #y el dato b+1 perteneciente al conjunto de prueba
                                                      
                  elif m < max_0(d) :
                       u=0
                       for s in range(0,x):
                              if d[s][0] > d[u][0] : 
                                  u=s
                       d[u]=[m,variable_respuesta[i]] #remplazamos el vecino menos cercano por el nuevo vecino  
              cantidad_unos=0
              for a in range(0,x):
                  if d[a][1] == 1 :
                     cantidad_unos=cantidad_unos+1
              cantidad_unos= 2.4*cantidad_unos
              cantidad_ceros=0
              for a in range(0,x):
                  if d[a][1] == 0 :
                     cantidad_ceros=cantidad_ceros+1      #predecimos la variable respuesta de el dato de validación
                     cantidad_ceros= 0.8 * cantidad_ceros
              cantidad_menosunos=0
              for a in range(0,x):
                  if d[a][1] == -1 :
                     cantidad_menosunos=cantidad_menosunos+1  
              cantidad_menosunos=1*cantidad_menosunos
              if cantidad_unos >= cantidad_ceros :
                    if cantidad_unos >= cantidad_menosunos :
                        predict[l] = 1
                    else :
                        predict[l] = -1
              else :
                  if cantidad_menosunos >= cantidad_ceros :
                        predict[l] = -1
                  else :
                        predict[l] = 0
                        
      return predict

In [None]:
L_1 = predicción_vecinosmodificado(4)
L_1

array([ 0.,  0.,  1.,  0.,  1.,  0., -1., -1., -1.,  0.,  0.,  0.,  0.,
       -1.,  0.,  0.,  0.,  0.,  0.,  0., -1., -1.,  0.,  0.,  0.,  0.,
        0.,  0., -1.,  0., -1.,  0.,  0.,  0.,  0., -1., -1.,  0.,  0.,
        0., -1.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,
        0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
        0., -1., -1., -1.,  0.,  0.,  0., -1.,  0., -1.,  0., -1., -1.,
        1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [None]:
def matriz_de_confucion(L):
    M=np.array([[0,0,0],
                [0,0,0],
                [0,0,0]])
    for i in range(0,87):
        if L[i] == -1 :
            if variable_respuesta[i]==-1 :
                M[0][0]=M[0][0]+1
            elif variable_respuesta[i]==0 :
                M[0][1]=M[0][1]+1
            else :
                M[0][2]=M[0][2]+1
        elif L[i]== 0 :
            if variable_respuesta[i]==-1 :
                M[1][0]=M[1][0]+1
            elif variable_respuesta[i]==0 :
                M[1][1]=M[1][1]+1
            else :
                M[1][2]=M[1][2]+1      
        else :
            if variable_respuesta[i]==-1 :
                M[2][0]=M[2][0]+1
            elif variable_respuesta[i]==0 :
                M[2][1]=M[2][1]+1
            else :
                M[2][2]=M[2][2]+1   
    return M

In [None]:
matriz_de_confucion(L_1)

array([[12,  9,  0],
       [10, 52,  0],
       [ 2,  0,  2]])

In [None]:
# matriz de confusión comparable con los metodos anteriores 

M = np.array([[52,10,0],[9,12,0],[2,0,2]])
M

array([[52, 10,  0],
       [ 9, 12,  0],
       [ 2,  0,  2]])

### **Exactitud de los modelos vistos (Accuracy)

### Análisis significancia de las variables

In [None]:
#test de wald

### Valores atípicos

### Gráficos

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c1562f8b-caab-4e2a-9fee-cd4640fe8257' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>