In [1]:
import pandas as pd

import mlflow
import mlflow.sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
# Define experiment name
experiment_name = "credit_default_risk_logistic_reg_Experiment"

# Set the experiment name
mlflow.set_experiment(experiment_name)

# Define run name
run_name = "LogisticRegression_Run_1"

2025/01/03 11:00:07 INFO mlflow.tracking.fluent: Experiment with name 'credit_default_risk_logistic_reg_Experiment' does not exist. Creating a new experiment.


In [4]:
# Load dataset
data = pd.read_csv('application_train_readyfor_models_w_columns.csv',sep=';')
# Select sample
data_sample= data.sample(n=120000)
X = data_sample.drop(['TARGET'], axis=1)
y = data_sample.TARGET
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [6]:
# Start the MLflow run with a specific run name
with mlflow.start_run(run_name=run_name):

    # Model training
    model = LogisticRegression(max_iter = 6000, solver='saga', random_state=808)
    model.fit(X_train, y_train)

    # Create input example (sample input data)
    input_example = X_train[0:1]

    # Log the model with input example
    mlflow.sklearn.log_model(model, "logistic_regression_model", input_example=input_example)

    # Log metrics (optional)
    mlflow.log_metric("accuracy", model.score(X_test, y_test))

    # Print out the current logged details of the MLflow run
    run_id = mlflow.active_run().info.run_id
    print(f"Run ID: {run_id}")

    # Fetch the logged parameters
    params = mlflow.get_run(run_id).data.params
    print("Logged Parameters:")
    for param in params:
        print(f"{param}: {params[param]}")

    # Fetch the logged metrics
    metrics = mlflow.get_run(run_id).data.metrics
    print("Logged Metrics:")
    for metric in metrics:
        print(f"{metric}: {metrics[metric]}")

    # Fetch the logged model artifact URI
    artifact_uri = mlflow.get_artifact_uri()
    print(f"Model Artifact URI: {artifact_uri}")

# End the run
mlflow.end_run()

Run ID: bfecad5ad447400881a6223e21967906
Logged Parameters:
Logged Metrics:
accuracy: 0.9222083333333333
Model Artifact URI: file:///c:/Users/oquach/OneDrive%20-%20Iliad/Documents/OpenClassRoom/Credit_default_risk/notebooks/mlruns/939470857449228318/bfecad5ad447400881a6223e21967906/artifacts


In [5]:
import pickle
# Save the model
with open("//models/trained_model.pkl", "wb") as f:
    pickle.dump(model, f)