In [None]:
pip install imblearn

In [2]:
#Carga de datos

def load_data():
    
    from preparation.preparation d
    
    ''' Defined Load File '''
    model_data = load_file_card().copy()
    
    ''' Preparin data for analytic model '''
    x = model_data.drop("fraud", axis = 1).values
    y = model_data["fraud"].values
    
    return x, y

In [3]:
#Particionamiento de datos

def make_train_test_split(x, y):
    
    import pandas as pd
    from imblearn.over_sampling import SMOTE
    from sklearn.model_selection import train_test_split
    
    smote = SMOTE(random_state=39)
    non_fraud_over, fraud_over = smote.fit_resample(x, y)

    non_fraud_over_df = pd.DataFrame(non_fraud_over, columns=["distance_from_home", "distance_from_last_transaction",
        "ratio_to_median_purchase_price", "repeat_retailer", "used_chip",
        "used_pin_number", "online_order"])

    non_fraud_over_df["fraud"] = fraud_over
    df3 = non_fraud_over_df

    feature_columns = ["distance_from_home", "distance_from_last_transaction",
    "ratio_to_median_purchase_price", "repeat_retailer", "used_chip", "used_pin_number", "online_order"]

    X_smote = df3[feature_columns]
    y_smote = df3.fraud

    X_train_smote, X_test_smote, y_train_smote, y_test_smote = train_test_split(X_smote, y_smote, test_size=0.2, random_state=39)
    
    return X_train_smote, X_test_smote, y_train_smote, y_test_smote


In [4]:
#Calculo de metricas

def eval_metrics(y_test_smote, y_pred_logreg_smote):
    
    from sklearn.metrics import classification_report
    from sklearn.metrics import confusion_matrix
    
    #confusion matrix
    confusion_matrix_logreg = confusion_matrix(y_test_smote, y_pred_logreg_smote)
    
    #classification report
    classification_report_logreg = classification_report(y_test_smote, y_pred_logreg_smote, digits=6)
    
    return confusion_matrix_logreg, classification_report_logreg    

In [5]:
#Reporte de métricas

def report(confusion_matrix_logreg, classification_report_logreg):
    
    print(f"Confusion matrix Logistic Regression: {confusion_matrix_logreg}")
    print(f"classification report RL: {classification_report_logreg}")

In [17]:
#Seteo de ruta para registro de modelos
def set_tracking_uri():

    import mlflow

    mlflow.set_tracking_uri('sqlite:///mlruns.db')
    #mlflow.set_tracking_uri('http://localhost:5000')

In [7]:
#Visualización config de rutas
def display_config():

    import mlflow

    print("Current model registry uri: {}".format(mlflow.get_registry_uri()))
    print("      Current tracking uri: {}".format(mlflow.get_tracking_uri()))
    print("      Artifacts tracking uri: {}".format(mlflow.get_tracking_uri()))

In [10]:
#Entrenamiento modelo

def train_logreg (max_iter = 200):
    
    import mlflow.sklearn
    import mlflow    
    from sklearn.linear_model import LogisticRegression
    
    #Habilita autolog
    mlflow.sklearn.autolog()
    
    #Setea entorno para registros
    set_tracking_uri()
    
    x, y = load_data()
    
    X_train_smote, X_test_smote, y_train_smote, y_test_smote = make_train_test_split(x, y)
    
    print('Tracking directory:', mlflow.get_tracking_uri())
    
    with mlflow.start_run(run_name = "LogReg_Model") as run:
    
        logreg = LogisticRegression(max_iter=max_iter)
        logreg.fit(X_train_smote, y_train_smote)

        y_pred_logreg_smote = logreg.predict(X_test_smote)
        
        
        confusion_matrix_logreg, classification_report_logreg = eval_metrics(y_test_smote, y_pred_logreg_smote)
        
        report(confusion_matrix_logreg, classification_report_logreg)
        
        #
        # Tracking de parámetros
        #
        mlflow.log_param("max_iter", max_iter)

        #
        # Tracking de metricas
        #
        mlflow.log_metric("accuracy_logreg", logreg.score(X_test_smote, y_test_smote))
        #mlflow.log_metric("classification_report_logreg", classification_report_logreg)

        #
        # Log del modelo
        #
        mlflow.sklearn.log_model(logreg, "model")
        
        #
        #Registro del modelo luego de varias corridas (se descomenta luego de correr con varias max_iter)
        #
        mlflow.register_model(
            f"runs:/{run.info.run_id}",
            f"sklearn-{max_iter}-iterations-logistic-regression-model"
        )
        
        return (run.info.experiment_id, run.info.run_id)

In [11]:
#Corridas
train_logreg()

2023/03/25 00:25:17 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '54cc11bb661f4a68afdb1061ebda3d29', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Tracking directory: sqlite:///mlruns.db
Confusion matrix Logistic Regression: [[170558  12121]
 [  9170 173190]]
classification report RL:               precision    recall  f1-score   support

         0.0   0.948978  0.933649  0.941251    182679
         1.0   0.934591  0.949715  0.942092    182360

    accuracy                       0.941675    365039
   macro avg   0.941785  0.941682  0.941672    365039
weighted avg   0.941791  0.941675  0.941671    365039



('0', '091922b00b35472bb1288a61d7f5b9fa')

In [None]:
train_logreg(300)

In [None]:
train_logreg(500)

In [None]:
train_logreg(5000)

In [12]:
def get_json_test_data():

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    data = x_test.iloc[0:10,:].to_json(orient='split')

    data = repr(data)
    return data

data = get_json_test_data()
data

2023/03/25 00:26:32 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '3be023fe6f92464ab92db76bb5aa99a0', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


'\'{"columns":["distance_from_home","distance_from_last_transaction","ratio_to_median_purchase_price","repeat_retailer","used_chip","used_pin_number","online_order"],"index":[40180,360884,1713712,1202718,1104767,1186848,764499,1206247,645822,545780],"data":[[22.4515410324,0.8570034081,1.1089148972,1.0,1.0,0.0,0.0],[2.7187672796,4.1638797774,0.6666231243,1.0,0.0,1.0,1.0],[31.7590076364,7.3546926635,5.0298263101,1.0,0.0,0.0,1.0],[31.5252884017,3.0382427237,10.4837593271,1.0,0.0,0.0,1.0],[7.6859550513,1.1679889103,6.8111370322,1.0,1.0,0.0,1.0],[4.862060388,0.1665092596,7.4033881957,1.0,0.0,0.0,1.0],[18.6174737057,3.3382951337,2.5523527288,1.0,1.0,1.0,1.0],[1.2281278102,0.3729986004,7.406533537,0.0,0.0,0.0,1.0],[24.8193826545,1.7512711665,0.7965344183,1.0,1.0,0.0,0.0],[4.6623866136,0.1949628846,0.2885423625,1.0,0.0,0.0,1.0]]}\''

In [20]:
display_config()

Current model registry uri: sqlite:///mlruns.db
      Current tracking uri: sqlite:///mlruns.db


In [18]:
set_tracking_uri()

In [21]:
!curl http://localhost:5000/invocations -H 'Content-Type: application/json' -d {data}

[0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]