## **Proyecto Final  Despliegue de Proyecto**

In [124]:
# Imporatmos las librerias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Cargamos y examinamos el data set
data = pd.read_csv('heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [125]:
# Instamos agboots
!pip install xgboost pandas scikit-learn



In [126]:
# Instalamos poetry
!pip install poetry



In [127]:
# Creamos un archivo pyproject.toml en la raíz del proyecto.
!poetry init -n

[31;1mA pyproject.toml file with a project and/or a poetry section already exists.[39;22m


Creamos la estructuras de las carpetas y archivos en Colab.
Debido a las limitaciones de mi computadora personal, realizo la ejecucion del proyecto desde Google Colb, utilizando mi computadora laboral.

In [128]:
#Crea las carpetas y archivos necesarios en Colab:

import os

# Crear la estructura de carpetas
os.makedirs('Deployment_XGBoost_Heart_Disease_UCI/data', exist_ok=True)
os.makedirs('Deployment_XGBoost_Heart_Disease_UCI/models', exist_ok=True)
os.makedirs('Deployment_XGBoost_Heart_Disease_UCI/src/data', exist_ok=True)
os.makedirs('Deployment_XGBoost_Heart_Disease_UCI/src/model', exist_ok=True)
os.makedirs('Deployment_XGBoost_Heart_Disease_UCI/tests', exist_ok=True)

# Verificar la estructura
!tree Deployment_XGBoost_Heart_Disease_UCI

/bin/bash: line 1: tree: command not found


In [129]:
!cat src/data/data_loader.py

In [130]:
# Este archivo carga los datos
%%writefile Deployment_XGBoost_Heart_Disease_UCI/src/data/data_loader.py
import pandas as pd

def load_data(file_path):
    """
    Carga un archivo CSV como un DataFrame de pandas.

    Parámetros:
        file_path (str): Ruta al archivo CSV.

    Retorna:
        pd.DataFrame: DataFrame con los datos cargados.
    """
    return pd.read_csv(file_path)

Overwriting Deployment_XGBoost_Heart_Disease_UCI/src/data/data_loader.py


In [131]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/src/data/data_splitter.py
from sklearn.model_selection import train_test_split

def split_data(data, target_column='target', test_size=0.2, random_state=42):
    """
    Divide los datos en conjuntos de entrenamiento y prueba.

    Parámetros:
        data (pd.DataFrame): DataFrame con los datos.
        target_column (str): Nombre de la columna objetivo.
        test_size (float): Proporción del conjunto de prueba.
        random_state (int): Semilla para reproducibilidad.

    Retorna:
        tuple: (X_train, X_test, y_train, y_test)
    """
    X = data.drop(target_column, axis=1)
    y = data[target_column]
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

Overwriting Deployment_XGBoost_Heart_Disease_UCI/src/data/data_splitter.py


In [132]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/src/data/data_processor.py
from sklearn.preprocessing import StandardScaler

def preprocess_data(X_train, X_test):
    """
    Preprocesa los datos escalándolos.

    Parámetros:
        X_train (pd.DataFrame): Datos de entrenamiento.
        X_test (pd.DataFrame): Datos de prueba.

    Retorna:
        tuple: (X_train, X_test)
    """
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test

Overwriting Deployment_XGBoost_Heart_Disease_UCI/src/data/data_processor.py


In [133]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/src/model/trainer.py
from xgboost import XGBClassifier

def train_model(X_train, y_train):
    """
    Entrena un modelo XGBoost.

    Parámetros:
        X_train (pd.DataFrame): Datos de entrenamiento.
        y_train (pd.Series): Etiquetas de entrenamiento.

    Retorna:
        XGBClassifier: Modelo entrenado.
    """
    model = XGBClassifier()
    model.fit(X_train, y_train)
    return model

Overwriting Deployment_XGBoost_Heart_Disease_UCI/src/model/trainer.py


In [134]:
# Este archivo evalúa el modelo entrenado.
%%writefile Deployment_XGBoost_Heart_Disease_UCI/src/model/evaluator.py
from sklearn.metrics import accuracy_score

def evaluate_model(model, X_test, y_test):
    """
    Evalúa el modelo.

    Parámetros:
        model (XGBClassifier): Modelo entrenado.
        X_test (pd.DataFrame): Datos de prueba.
        y_test (pd.Series): Etiquetas de prueba.

    Retorna:
        None
    """
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy:.2f}')

Overwriting Deployment_XGBoost_Heart_Disease_UCI/src/model/evaluator.py


In [135]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/src/model/saver.py
import pickle

def save_model(model, file_path):
    """
    Guarda el modelo en un archivo.

    Parámetros:
        model (XGBClassifier): Modelo entrenado.
        file_path (str): Ruta al archivo.

    Retorna:
        None
    """
    with open(file_path, 'wb') as f:
        pickle.dump(model, f)


Overwriting Deployment_XGBoost_Heart_Disease_UCI/src/model/saver.py


In [136]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/main.py
import sys
import os

# Agregar la ruta de src al sys.path
sys.path.append(os.path.abspath('src'))

# Importar módulos
from src.data.data_loader import load_data
from src.data.data_splitter import split_data
from src.data.data_processor import preprocess_data
from src.model.trainer import train_model
from src.model.evaluator import evaluate_model
from src.model.saver import save_model

def main():
    # Ruta al archivo CSV
    file_path = 'heart.csv'

    # Cargar datos
    data = load_data(file_path)

    # Dividir datos
    X_train, X_test, y_train, y_test = split_data(data)

    # Preprocesar datos
    X_train, X_test = preprocess_data(X_train, X_test)

    # Entrenar modelo
    model = train_model(X_train, y_train)

    # Evaluar modelo
    evaluate_model(model, X_test, y_test)

    # Guardar modelo
    save_model(model, 'models/xgboost_model.pkl')

if __name__ == "__main__":
    main()

Overwriting Deployment_XGBoost_Heart_Disease_UCI/main.py


**Realizar las pruebas unitarias**

In [137]:
# Installar
!pip install pytest



In [146]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/tests/conftest.py
import sys
import os

# Agregar la ruta de src al sys.path
sys.path.append(os.path.abspath('src'))


Writing Deployment_XGBoost_Heart_Disease_UCI/tests/conftest.py


In [148]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_loader.py
import pytest
from src.data.data_loader import load_data

def test_load_data():
    # Ruta a un archivo de prueba
    file_path = 'heart.csv'
    data = load_data(file_path)

    # Verificar que los datos se cargan correctamente
    assert not data.empty, "El DataFrame está vacío"

Overwriting Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_loader.py


In [149]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_splitter.py
import pytest
import pandas as pd
from src.data.data_splitter import split_data

def test_split_data():
    # Crear un DataFrame de prueba
    data = pd.DataFrame({
        'feature1': [1, 2, 3, 4, 5],
        'feature2': [10, 20, 30, 40, 50],
        'target': [0, 1, 0, 1, 0]
    })

    # Dividir los datos
    X_train, X_test, y_train, y_test = split_data(data, target_column='target')

    # Verificar que los conjuntos no estén vacíos
    assert not X_train.empty, "X_train está vacío"
    assert not X_test.empty, "X_test está vacío"
    assert not y_train.empty, "y_train está vacío"
    assert not y_test.empty, "y_test está vacío"

Overwriting Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_splitter.py


In [150]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_processor.py
import pytest
import numpy as np
from src.data.data_processor import preprocess_data

def test_preprocess_data():
    # Crear datos de prueba
    X_train = np.array([[1, 2], [3, 4]])
    X_test = np.array([[5, 6]])

    # Preprocesar los datos
    X_train_processed, X_test_processed = preprocess_data(X_train, X_test)

    # Verificar que los datos se escalaron correctamente
    assert np.mean(X_train_processed) == pytest.approx(0.0, abs=1e-7), "X_train no está escalado"
    assert np.std(X_train_processed) == pytest.approx(1.0, abs=1e-7), "X_train no está escalado"

Overwriting Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_processor.py


In [151]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/tests/test_trainer.py
import pytest
import numpy as np
from src.model.trainer import train_model

def test_train_model():
    # Crear datos de prueba
    X_train = np.array([[1, 2], [3, 4]])
    y_train = np.array([0, 1])

    # Entrenar el modelo
    model = train_model(X_train, y_train)

    # Verificar que el modelo se entrenó
    assert model is not None, "El modelo no se entrenó"

Overwriting Deployment_XGBoost_Heart_Disease_UCI/tests/test_trainer.py


In [152]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/tests/test_evaluator.py
import pytest
import numpy as np
from src.model.evaluator import evaluate_model
from xgboost import XGBClassifier

def test_evaluate_model():
    # Crear datos de prueba
    X_test = np.array([[1, 2], [3, 4]])
    y_test = np.array([0, 1])

    # Crear un modelo de prueba
    model = XGBClassifier()
    model.fit(X_test, y_test)

    # Evaluar el modelo
    evaluate_model(model, X_test, y_test)

Overwriting Deployment_XGBoost_Heart_Disease_UCI/tests/test_evaluator.py


In [153]:
%%writefile Deployment_XGBoost_Heart_Disease_UCI/tests/test_saver.py
import pytest
import os
from src.model.saver import save_model
from xgboost import XGBClassifier

def test_save_model(tmpdir):
    # Crear un modelo de prueba
    model = XGBClassifier()

    # Ruta temporal para guardar el modelo
    file_path = tmpdir.join("model.pkl")

    # Guardar el modelo
    save_model(model, file_path)

    # Verificar que el archivo existe
    assert os.path.exists(file_path), "El archivo no se guardó"

Overwriting Deployment_XGBoost_Heart_Disease_UCI/tests/test_saver.py


Ejecutamos

In [155]:
# Ejecutamos el main.py script

!python Deployment_XGBoost_Heart_Disease_UCI/main.py

Accuracy: 0.82


In [156]:
!pytest Deployment_XGBoost_Heart_Disease_UCI/tests/

platform linux -- Python 3.10.12, pytest-8.3.4, pluggy-1.5.0
rootdir: /content
configfile: pyproject.toml
plugins: typeguard-4.4.1, anyio-3.7.1
collected 0 items / 6 errors                                                                       [0m

[31m[1m_________ ERROR collecting Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_loader.py __________[0m
[31mImportError while importing test module '/content/Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_loader.py'.
Hint: make sure your test modules/packages have valid Python names.
Traceback:
/usr/lib/python3.10/importlib/__init__.py:126: in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_loader.py:2: in <module>
    from src.data.data_loader import load_data
E   ModuleNotFoundError: No module named 'src'[0m
[31m[1m________ ERROR collecting Deployment_XGBoost_Heart_Disease_UCI/tests/test_data_processor.py ________[0m
[31mImportError whi