In [2]:
from typing import List, Dict
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import os
import boto3
from skl2onnx import to_onnx
from onnxconverter_common.data_types import FloatTensorType, Int64TensorType

In [3]:
data = pd.read_csv("./data/diabetes.csv")

In [4]:
X = data.drop('Outcome', axis = 1)
y = data['Outcome']

In [5]:
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [6]:
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

print(f"Number of samples in training set: {X_train.shape[0]}")
print(f"Number of samples in test set: {X_test.shape[0]}")

Number of samples in training set: 614
Number of samples in test set: 154


In [8]:
model = LogisticRegression(penalty="l2", C = 1.0, max_iter = 300)
model.fit(X_train, y_train)

In [9]:
y_predicted = model.predict(X_test)

print("Reporte de clasificación")
print(classification_report(y_test, y_predicted))

Reporte de clasificación
              precision    recall  f1-score   support

           0       0.84      0.92      0.88       107
           1       0.76      0.62      0.68        47

    accuracy                           0.82       154
   macro avg       0.80      0.77      0.78       154
weighted avg       0.82      0.82      0.82       154



In [10]:
classes = ('No diabetes', 'Diabetes')

def predict(patients: List[Dict]):
    inputs = pd.DataFrame(patients)
    predictions = model.predict(inputs)
    return [classes[p] for p in predictions]

diabetes_patient = {
    "Pregnancies": 6.0,
    "Glucose": 110.0,
    "BloodPressure": 65.0,
    "SkinThickness": 15.0,
    "Insulin": 1.0,
    "BMI": 45.7,
    "DiabetesPedigreeFunction": 0.627,
    "Age": 50
}

no_diabetes_patient = {
    "Pregnancies": 0,
    "Glucose": 88.0,
    "BloodPressure": 60.0,
    "SkinThickness": 35.0,
    "Insulin": 1.0,
    "BMI": 45.7,
    "DiabetesPedigreeFunction": 0.27,
    "Age": 20
}


predicciones = predict([diabetes_patient, no_diabetes_patient])
print(predicciones)

['Diabetes', 'No diabetes']


In [11]:
sample_row = X[:1]
sample_row

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50


In [12]:
def dtype_to_onnxType(dt):
    if (dt==dtype('float64')):
        return FloatTensorType((1,1))
    if (dt == dtype('int64')):
        return Int64TensorType((1,1))

In [13]:
schema = [("X", FloatTensorType([None, X.shape[1]]))]

In [14]:
onnx_model = to_onnx(model, initial_types = schema)
onnx_model

ir_version: 8
producer_name: "skl2onnx"
producer_version: "1.15.0"
domain: "ai.onnx"
model_version: 0
doc_string: ""
graph {
  node {
    input: "X"
    output: "label"
    output: "probability_tensor"
    name: "LinearClassifier"
    op_type: "LinearClassifier"
    attribute {
      name: "classlabels_ints"
      ints: 0
      ints: 1
      type: INTS
    }
    attribute {
      name: "coefficients"
      floats: -0.09075506776571274
      floats: -0.03388337790966034
      floats: 0.01370192226022482
      floats: -0.004600933752954006
      floats: 0.001396218198351562
      floats: -0.08794182538986206
      floats: -0.8066747188568115
      floats: -0.020074469968676567
      floats: 0.09075506776571274
      floats: 0.03388337790966034
      floats: -0.01370192226022482
      floats: 0.004600933752954006
      floats: -0.001396218198351562
      floats: 0.08794182538986206
      floats: 0.8066747188568115
      floats: 0.020074469968676567
      type: FLOATS
    }
    attribute {

In [17]:
with open("diabetes_detection.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

In [21]:
key_id = os.getenv("AWS_ACCESS_KEY_ID")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
endpoint = os.getenv("AWS_S3_ENDPOINT")
bucket_name = os.getenv("AWS_S3_BUCKET")

s3_client = boto3.client("s3",aws_access_key_id = key_id, aws_secret_access_key = secret_key, endpoint_url = endpoint, verify = False)

s3_client.upload_file("diabetes_detection.onnx", bucket_name, Key="diabetes_detection.onnx")

print("File diabetes_detection.onnx uploaded to S3!")

File diabetes_detection.onnx uploaded to S3!


