In [57]:
tagging = "qg" # "top", "qg"

In [58]:
from ML.EFPs.split import split

# Partitioning and Splitting
X_train, X_val, X_test, y_train, y_val, y_test = split(tagging)

In [59]:
n = 3 # 0, 1, 2, 3, 4, 5

In [60]:
from src.Preprocessing.norm import norm

# Normalization
X_train, X_val, X_test = norm(n,X_train), norm(n,X_val), norm(n,X_test)

In [61]:
import numpy as np

# Combine training and validation sets for GridSearchCV
X_train_val = np.concatenate((X_train, X_val), axis=0)
y_train_val = np.concatenate((y_train, y_val), axis=0)

In [62]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, roc_auc_score

model = LogisticRegression(
    tol=1e-3,
    warm_start=True,
    solver='saga',
    max_iter=10000,
    class_weight=None # as categorias já estão balanceadas
)

# Define the parameter grid
param_grid = [
    {'penalty': [None]},
    {'penalty': ['elasticnet'],'l1_ratio': [0, 0.5, 1],'C': [0.1,1,10]}
]

# Create the custom AUC scorer
auc_scorer = make_scorer(roc_auc_score, response_method='predict_proba')

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring=auc_scorer)

# Fit the GridSearchCV to the training data
grid_search.fit(X_train_val, y_train_val)

# Retrieve the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Train+Val AUC Score:", best_score)

# Evaluate on the test set
best_model = grid_search.best_estimator_
test_auc_score = roc_auc_score(y_test, best_model.predict_proba(X_test)[:, 1])
print("Test AUC Score:", test_auc_score)

Best Parameters: {'penalty': None}
Train+Val AUC Score: 0.8489288985173138
Test AUC Score: 0.8484259215611765


Top n0:

Best Parameters: {'penalty': None}

Train+Val AUC Score: 0.9477558428617991

Test AUC Score: 0.9490567565579713

Top n1:

Best Parameters: {'C': 10, 'l1_ratio': 1, 'penalty': 'elasticnet'}

Train+Val AUC Score: 0.9501137539494614

Test AUC Score: 0.9477973230382086

Top n3:

Best Parameters: {'penalty': None}

Train+Val AUC Score: 0.9452454353548141

Test AUC Score: 0.9462030370083123

QG n0:

Best Parameters: {'C': 1, 'l1_ratio': 1, 'penalty': 'elasticnet'}

Train+Val AUC Score: 0.8401828164212167

Test AUC Score: 0.8385689095228394

QG n3:

Best Parameters: {'penalty': None}

Train+Val AUC Score: 0.8489288985173138

Test AUC Score: 0.8484259215611765

# Saving the model

In [64]:
import joblib

# Save the trained model to a file
joblib_file = "LogisticRegression_qg_n3.joblib"
joblib.dump(best_model, joblib_file)

['LogisticRegression_qg_n3.joblib']

# Loading the models

In [65]:
loaded_logreg1 = joblib.load("LogisticRegression_top_n0.joblib")
loaded_logreg2 = joblib.load("LogisticRegression_qg_n3.joblib")

In [69]:
print(loaded_logreg1.intercept_,loaded_logreg1.coef_)

[-7.8291638] [[   2.1199024    93.61709804   55.99737824 -348.39928345  -43.38862406
   114.35126716 -108.71609173 -113.23333932  -15.18485591  -69.38693334
    48.10689851  -11.98067895  847.30162436  390.85654579  173.13515903
   156.44425989  438.38119949  165.38306622   77.97379694   65.84008409
  -669.22812227 -269.42391644 -175.57187025 -101.93059334  -32.85969039
  -115.47715737  -76.58836625  158.02418669   49.96257857   82.41829425
    54.23419134 -340.21664906 -132.24279014   30.24546389  262.70581233
   101.45824108 -114.83130892  -40.52184524  -20.46215681  -55.2514097
  -187.23676994  -71.20868715  -64.78100075   44.58224554   19.96551909
     7.81310073  -68.022896     -4.38414982  110.11837013   -8.37791346
   -25.20454281  -40.4051249   -34.92107587  -45.2811042 ]]


In [70]:
print(loaded_logreg2.intercept_,loaded_logreg2.coef_

[2.94281667] [[ 2.20222631  3.28138751  2.54174708 -0.93395493 -1.25711667  1.44580354
   3.83605018  1.20509578  0.23052523 -0.17130515  2.42785297  1.17872136
  -0.78943718 -1.55098244 -1.74549898 -0.45004004  1.92045622 -0.79494907
   0.29753968 -1.17196512  0.04107184  0.69964964 -2.9877925   0.90884443
  -1.81987052  0.38540944  0.07084035  1.83861052  1.28260277  0.15628587
   2.14506328 -3.33712872 -4.73728506 -3.51568678 -0.27064247 -0.93914125
   0.60643347  0.05418223 -0.84362951  1.81120958  2.29930656  1.54633498
  -0.05650449  0.90671632  2.90312406 -1.05333479 -2.45503348 -0.45178054
  -0.49173216 -0.27136879  2.06378218 -0.84492797  2.60122965 -1.52730636]]
