In [35]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Bloque 1

def load_data(file_path):
    column_names = [
        'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
        'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num'
    ]
    used_columns = [2, 3, 8, 11, 15, 18, 19, 31, 38, 39, 40, 43, 50, 57]
    df = pd.read_csv(file_path, header=None, names=column_names, usecols=used_columns, na_values='?')
    return df

file_name = 'datos.csv'
df = load_data(file_name)

imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

X = df_imputed.drop('num', axis=1)
y = df_imputed['num']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

log_reg = LogisticRegression(solver='newton-cg', max_iter=10000, random_state=42)
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Bloque 2

classifiers = [
    ('Regresión Logística', LogisticRegression(solver='newton-cg', max_iter=10000, random_state=42)),
    ('Random Forest', RandomForestClassifier(random_state=42)),
    ('Máquina de Vectores de Soporte', SVC(random_state=42)),
    ('K-Vecinos más Cercanos', KNeighborsClassifier())
]

for name, classifier in classifiers:
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    
    print(f"Clasificador: {name}")
    print("Matriz de Confusión:\n", confusion_matrix(y_test, y_pred))
    print("Informe de Clasificación:\n", classification_report(y_test, y_pred))
    print("-" * 60)


Accuracy: 0.45
Clasificador: Regresión Logística
Matriz de Confusión:
 [[37  2  2  1  1]
 [14  9  5  5  0]
 [ 4  6  3  1  0]
 [ 5  6  9  7  1]
 [ 1  4  0  1  0]]
Informe de Clasificación:
               precision    recall  f1-score   support

         0.0       0.61      0.86      0.71        43
       1e+00       0.33      0.27      0.30        33
         2.0       0.16      0.21      0.18        14
         3.0       0.47      0.25      0.33        28
         4.0       0.00      0.00      0.00         6

    accuracy                           0.45       124
   macro avg       0.31      0.32      0.30       124
weighted avg       0.42      0.45      0.42       124

------------------------------------------------------------
Clasificador: Random Forest
Matriz de Confusión:
 [[38  0  4  1  0]
 [12 11  5  4  1]
 [ 3  3  6  2  0]
 [ 5  8  7  6  2]
 [ 0  3  0  3  0]]
Informe de Clasificación:
               precision    recall  f1-score   support

         0.0       0.66      0.88     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
