# **Despliegue del Modelo**
---

In [40]:
# Librerias
!pip install fastapi
import re
import os
import requests
import logging
import joblib
import sklearn
import xgboost
import fastapi
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from pydantic import BaseModel
from typing import List
from fastapi import FastAPI



In [2]:
# Funcion para crear el objeto de creación de logs
def create_logger():
  logging.basicConfig(level = logging.INFO, format = '%(asctime)s - %(levenname)s - %(message)s')
  logger = logging.getLogger('Logger')
  logger.info('Logger creado')
  return logger

In [3]:
# Función de consumo a la base de datos de Firebase para cargar el DataSet
def download_firebase(url, logger):
  logger.info("Extrayendo el archivo desde Firebase")
  df = None
  try:
    df = pd.read_csv(url)
    logger.info("Archivo cargado")
  except requests.exceptions.RequestException as e:
    logger.info(f"Error al descargar el archivo CSV: {e}")
  except pd.errors.EmptyDataError:
    logger.info("El archivo CSV está vacío.")
  except Exception as e:
    logger.info(f"Ocurrió un error inesperado: {e}")
  return df

In [4]:
# Cargar DataSet
url = 'https://firebasestorage.googleapis.com/v0/b/personalwp-8822c.appspot.com/o/diabetes_prediction_dataset.csv?alt=media&token=4d70d154-c3d0-4fa0-a3aa-9b9972dd3b95'
logger = create_logger()
df = download_firebase(url, logger)

In [5]:
# Eliminación de valores atípicos y duplicados
seventy_fifth = df['bmi'].quantile(0.75)
twenty_fifth = df['bmi'].quantile(0.25)
iqr = seventy_fifth - twenty_fifth
upper = seventy_fifth + (10 * iqr)
outliers_bmi_upper = df[(df['bmi'] > upper)]
df = pd.merge(df, outliers_bmi_upper, indicator = True, how = 'outer').query('_merge == "left_only"').drop('_merge', axis = 1)
df = df.drop_duplicates(keep = "first")

In [6]:
# Variables Categóricas a Numéricas
df['gender'] = pd.factorize(df['gender'])[0]
df['smoking_history'] = pd.factorize(df['smoking_history'])[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['gender'] = pd.factorize(df['gender'])[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['smoking_history'] = pd.factorize(df['smoking_history'])[0]


In [7]:
# Partición de datos
X = df.drop(columns = 'diabetes')
y = df['diabetes']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42, stratify = y)

In [8]:
# Entrenamiento del modelo
model = XGBClassifier(max_depth = 48, n_estimators = 130, learning_rate = 0.010291737939302062).fit(X_train, y_train)
predict = model.predict(X_test)
joblib.dump(model, "model.joblib")

['model.joblib']

## **Configuración Endpoint FastAPI**
---

In [9]:
# Clase de entrada
class ApiInput(BaseModel):
  features: List[float]

In [10]:
# Clase de salida
class ApiOutput(BaseModel):
  forecast: int

In [11]:
# Creación de script para configurar el endpoint del API
%%writefile main.py
import joblib
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List

# Clase de entrada
class ApiInput(BaseModel):
  features: List[float]

# Clase de salida
class ApiOutput(BaseModel):
  forecast: int

# Creación del API
app = FastAPI()
model = joblib.load("model.joblib")

# Endpoint del API tipo post
@app.post("/diabetes_prediction")
async def diabetes_prediction(data: ApiInput) -> ApiOutput:
  predict = model.predict([data.features]).flatten().tolist()
  pred = ApiOutput(forecast = predict[0])
  return pred

Writing main.py


## **Configuración Servicio de Alojamiento Railway**
---

In [12]:
# Creación de archivo de configuración para el servicio de alojamiento del PAI
%%writefile railway.json
{
  "$schema": "https://railway.app/railway.schema.json",
  "build": {
    "builder": "NIXPACKS"
  },
  "deploy": {
    "startCommand": "uvicorn main:app --host 0.0.0.0 --port $PORT",
    "restartPolicyType": "ON_FAILURE",
    "restartPolicyMaxRetries": 10
  }
}

Writing railway.json


In [13]:
# Creación de archivo con las dependencias del proyecto
%%writefile requirements.txt
scikit-learn
xgboost
fastapi==0.82.0
uvicorn==0.19.0
joblib

Writing requirements.txt


## **Cargue de Proyecto en GitHub**
---

In [14]:
# Autenticación en GitHub
!git config --global user.email "jroncanciot@unal.edu.co"
!git config --global user.name "jroncanciot"
!git config --global init.defaultBranch master
token = ... # No se carga el token a GitHub para evitar accesos no deseados

In [15]:
# Inicialización del proyecto
!mkdir diabetes
!mv main.py model.joblib railway.json requirements.txt diabetes/
%cd diabetes/
!git init

/content/diabetes
Initialized empty Git repository in /content/diabetes/.git/


In [16]:
# Creación del commit para cargar el proyecto en GitHub
!git add main.py model.joblib railway.json requirements.txt
!git commit -m "Agregamos los archivos necesarios para desplegar el API."

[master (root-commit) 3c0f607] Agregamos los archivos necesarios para desplegar el API.
 4 files changed, 39 insertions(+)
 create mode 100644 main.py
 create mode 100644 model.joblib
 create mode 100644 railway.json
 create mode 100644 requirements.txt


In [17]:
# Enlace del repositorio local con el repositorio de GitHub
repo_url = "https://github.com/jroncanciot/diabetes.git"
pat = re.compile(r"(https://)(.*)")
match = re.match(pat, repo_url)
url_token = "".join([match.group(1), token, "@", match.group(2)])
os.environ["GITHUB"] = url_token
!git remote add origin $GITHUB
!git push origin master

Enumerating objects: 6, done.
Counting objects:  16% (1/6)Counting objects:  33% (2/6)Counting objects:  50% (3/6)Counting objects:  66% (4/6)Counting objects:  83% (5/6)Counting objects: 100% (6/6)Counting objects: 100% (6/6), done.
Delta compression using up to 2 threads
Compressing objects: 100% (6/6), done.
Writing objects: 100% (6/6), 3.90 MiB | 4.12 MiB/s, done.
Total 6 (delta 0), reused 0 (delta 0), pack-reused 0
To https://github.com/jroncanciot/diabetes.git
 * [new branch]      master -> master


## **Funcionamiento del API**
---

In [18]:
# Petición al API
model_url = "https://diabetes-production-2036.up.railway.app"
feature = ApiInput(features = [1, 50, 1, 1, 0, 27.32, 6.2, 130])
response = requests.post(os.path.join(model_url, "diabetes_prediction"), json = feature.dict())
print(response.json())

<ipython-input-18-3141a4fce7c9>:4: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  response = requests.post(os.path.join(model_url, "diabetes_prediction"), json = feature.dict())


{'forecast': 0}


## **Versiones de las Librerias**
---

In [39]:
# Versiones de las librerías usadas
!python --version
columns = ["Paquete", "Versión"]
data = [
  ["Request", requests.__version__],
  ["Logging", logging.__version__],
  ["Joblib", joblib.__version__],
  ["Pandas", pd.__version__],
  ["Numpy", np.__version__],
  ["Sklearn", sklearn.__version__],
  ["Xgboost", xgboost.__version__],
  ["Fastapi", fastapi.__version__]
]
pack = pd.DataFrame(data, columns = columns)
pack
#print('FastAPI', fastapi.__version__)

Python 3.10.12


Unnamed: 0,Paquete,Versión
0,Request,2.32.3
1,Logging,0.5.1.2
2,Joblib,1.4.2
3,Pandas,2.2.2
4,Numpy,1.26.4
5,Sklearn,1.5.2
6,Xgboost,2.1.3
7,Fastapi,0.115.6


**Universidad Nacional de Colombia** - *Facultad de Ingeniería*