In [57]:
tagging = "qg" # "top", "qg"

In [58]:
from ML.EFPs.split import split

# Partitioning and Splitting
X_train, X_val, X_test, y_train, y_val, y_test = split(tagging)

In [59]:
n = 3 # 0, 1, 2, 3, 4, 5

In [60]:
from src.Preprocessing.norm import norm

# Normalization
X_train, X_val, X_test = norm(n,X_train), norm(n,X_val), norm(n,X_test)

In [61]:
import numpy as np

# Combine training and validation sets for GridSearchCV
X_train_val = np.concatenate((X_train, X_val), axis=0)
y_train_val = np.concatenate((y_train, y_val), axis=0)

In [62]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, roc_auc_score

model = LogisticRegression(
    tol=1e-3,
    warm_start=True,
    solver='saga',
    max_iter=10000,
    class_weight=None # as categorias já estão balanceadas
)

# Define the parameter grid
param_grid = [
    {'penalty': [None]},
    {'penalty': ['elasticnet'],'l1_ratio': [0, 0.5, 1],'C': [0.1,1,10]}
]

# Create the custom AUC scorer
auc_scorer = make_scorer(roc_auc_score, response_method='predict_proba')

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring=auc_scorer)

# Fit the GridSearchCV to the training data
grid_search.fit(X_train_val, y_train_val)

# Retrieve the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Train+Val AUC Score:", best_score)

# Evaluate on the test set
best_model = grid_search.best_estimator_
test_auc_score = roc_auc_score(y_test, best_model.predict_proba(X_test)[:, 1])
print("Test AUC Score:", test_auc_score)

Best Parameters: {'penalty': None}
Train+Val AUC Score: 0.8489288985173138
Test AUC Score: 0.8484259215611765


Top n0:

Best Parameters: {'penalty': None}

Train+Val AUC Score: 0.9477558428617991

Test AUC Score: 0.9490567565579713

Top n1:

Best Parameters: {'C': 10, 'l1_ratio': 1, 'penalty': 'elasticnet'}

Train+Val AUC Score: 0.9501137539494614

Test AUC Score: 0.9477973230382086

Top n3:

Best Parameters: {'penalty': None}

Train+Val AUC Score: 0.9452454353548141

Test AUC Score: 0.9462030370083123

QG n0:

Best Parameters: {'C': 1, 'l1_ratio': 1, 'penalty': 'elasticnet'}

Train+Val AUC Score: 0.8401828164212167

Test AUC Score: 0.8385689095228394

QG n3:

Best Parameters: {'penalty': None}

Train+Val AUC Score: 0.8489288985173138

Test AUC Score: 0.8484259215611765

# Saving the model

In [64]:
import joblib

# Save the trained model to a file
joblib_file = "LogisticRegression_qg_n3.joblib"
joblib.dump(best_model, joblib_file)

['LogisticRegression_qg_n3.joblib']