In [1]:
import os
import pandas as pd
import yaml
import pickle

from utils.training_utils import find_specific_variables

from lightgbm import LGBMClassifier

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [2]:
features = yaml.safe_load(open(os.path.join('..', 'src', 'config', 'feature_config.yaml'), 'r'))

# Modelo para classificação de um produto em promoção

In [3]:
df = pd.read_csv(os.path.join('..', 'data', 'train_test', 'train_encoded.csv'))

print(df.shape)
df.head()

(32940, 19)


Unnamed: 0,contact,default,education,job,month,poutcome,quarter,age,campaign,cons.conf.idx,cons.price.idx,contacts_tendency,emp.var.rate,euribor3m,nr.employed,pdays,previous,was_contacted_before,y
0,0.0,0.0,6.0,0.0,9.0,1.0,2.0,31.0,3.0,-29.8,92.379,0.0,-3.4,0.803,5017.5,999.0,0.0,0.0,0
1,1.0,0.0,3.0,3.0,6.0,1.0,1.0,39.0,2.0,-36.4,93.994,0.0,1.1,4.857,5191.0,999.0,0.0,0.0,0
2,0.0,0.0,5.0,2.0,3.0,1.0,2.0,34.0,4.0,-42.7,93.918,0.0,1.4,4.958,5228.1,999.0,0.0,0.0,0
3,1.0,1.0,2.0,9.0,6.0,1.0,1.0,36.0,9.0,-36.4,93.994,0.0,1.1,4.856,5191.0,999.0,0.0,0.0,0
4,0.0,1.0,7.0,8.0,1.0,1.0,2.0,25.0,1.0,-31.4,92.201,0.0,-2.9,0.825,5076.2,999.0,0.0,0.0,0


In [4]:
seletor = pickle.load(
    open(os.path.join('..', 'models', 'encoders', 'seletor_2.pkl'), 'rb')
)

df_hyperparams = pickle.load(
    open(os.path.join('..', 'models', 'df_metrics_results_tunning_lgbm.pkl'), 'rb')
)

In [5]:

feature_target = find_specific_variables(features, 'target', specific_value=True)

In [6]:
df_treino, df_valid = train_test_split(df, test_size=0.2, random_state=96)

In [7]:
print(f'Shape Treino: {df_treino.shape}')
print(f'Shape Valid: {df_valid.shape}')

Shape Treino: (26352, 19)
Shape Valid: (6588, 19)


In [8]:
print(f'% Treino: {df_treino[feature_target[0]].mean()}')
print(f'% Valid: {df_valid[feature_target[0]].mean()}')

% Treino: 0.11202185792349727
% Valid: 0.11262902246508803


In [9]:
df_hyperparams[df_hyperparams.value == max(df_hyperparams.value)].T

Unnamed: 0,22
number,22
value,0.807497
datetime_start,2025-06-21 15:13:34.908668
datetime_complete,2025-06-21 15:13:37.438271
duration,0 days 00:00:02.529603
params_class_weight,balanced
params_learning_rate,0.016298
params_max_depth,6
params_min_child_samples,79
params_n_estimators,219


In [10]:
best_row = df_hyperparams.loc[df_hyperparams['value'].idxmax()]
best_params = best_row.filter(like='params_')
hyper_params = {col.replace('params_', ''): best_params[col] for col in best_params.index}


hyper_params.update({
    'eval_metric': 'auc',
})

In [11]:
hyper_params

{'class_weight': 'balanced',
 'learning_rate': 0.01629792783423414,
 'max_depth': 6,
 'min_child_samples': 79,
 'n_estimators': 219,
 'num_leaves': 44,
 'eval_metric': 'auc'}

In [12]:
model = LGBMClassifier(
    **hyper_params,
    random_state=12,
    n_jobs=-1
)

model

In [13]:
model.fit(
    df_treino[seletor.features].values,
    df_treino[feature_target].values,
    eval_set=[(df_valid[seletor.features].values, df_valid[feature_target].values)],
    verbose=True
)

[1]	valid_0's binary_logloss: 0.688284
[2]	valid_0's binary_logloss: 0.683574
[3]	valid_0's binary_logloss: 0.678983
[4]	valid_0's binary_logloss: 0.67454
[5]	valid_0's binary_logloss: 0.670211
[6]	valid_0's binary_logloss: 0.665997
[7]	valid_0's binary_logloss: 0.661902
[8]	valid_0's binary_logloss: 0.657918
[9]	valid_0's binary_logloss: 0.654053
[10]	valid_0's binary_logloss: 0.650291
[11]	valid_0's binary_logloss: 0.646606
[12]	valid_0's binary_logloss: 0.643051
[13]	valid_0's binary_logloss: 0.639572
[14]	valid_0's binary_logloss: 0.636185
[15]	valid_0's binary_logloss: 0.63291
[16]	valid_0's binary_logloss: 0.629706
[17]	valid_0's binary_logloss: 0.626607
[18]	valid_0's binary_logloss: 0.623576
[19]	valid_0's binary_logloss: 0.620628
[20]	valid_0's binary_logloss: 0.617702
[21]	valid_0's binary_logloss: 0.614897
[22]	valid_0's binary_logloss: 0.61218
[23]	valid_0's binary_logloss: 0.609527
[24]	valid_0's binary_logloss: 0.606961
[25]	valid_0's binary_logloss: 0.604414
[26]	valid_0

In [14]:
os.makedirs(os.path.join('..', 'models', 'predictors'), exist_ok=True)

pickle.dump(
    model, 
    open(os.path.join('..', 'models', 'predictors', 'model.pkl'), 'wb')
)