In [1]:
import os
import pandas as pd
import yaml
import pickle

from utils.training_utils import find_specific_variables

from lightgbm import LGBMClassifier

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [2]:
features = yaml.safe_load(open(os.path.join('..', 'src', 'config', 'feature_config.yaml'), 'r'))

# Modelo para classificação de um produto em promoção

In [3]:
df = pd.read_csv(os.path.join('..', 'data', 'train_test', 'train_encoded.csv'))

print(df.shape)
df.head()

(32940, 19)


Unnamed: 0,contact,default,employment_status,job,month,poutcome,quarter,age,campaign,cons.conf.idx,cons.price.idx,contacts_tendency,emp.var.rate,euribor3m,nr.employed,pdays,previous,was_contacted_before,y
0,0.0,0.0,0.0,0.0,9.0,1.0,2.0,31.0,3.0,-29.8,92.379,0.0,-3.4,0.803,5017.5,999.0,0.0,0.0,0
1,1.0,0.0,0.0,3.0,6.0,1.0,1.0,39.0,2.0,-36.4,93.994,0.0,1.1,4.857,5191.0,999.0,0.0,0.0,0
2,0.0,0.0,0.0,2.0,3.0,1.0,2.0,34.0,4.0,-42.7,93.918,0.0,1.4,4.958,5228.1,999.0,0.0,0.0,0
3,1.0,1.0,0.0,9.0,6.0,1.0,1.0,36.0,9.0,-36.4,93.994,0.0,1.1,4.856,5191.0,999.0,0.0,0.0,0
4,0.0,1.0,1.0,8.0,1.0,1.0,2.0,25.0,1.0,-31.4,92.201,0.0,-2.9,0.825,5076.2,999.0,0.0,0.0,0


In [4]:
seletor = pickle.load(
    open(os.path.join('..', 'models', 'encoders', 'seletor_2.pkl'), 'rb')
)

df_hyperparams = pickle.load(
    open(os.path.join('..', 'models', 'df_metrics_results_tunning_lgbm.pkl'), 'rb')
)

In [5]:

feature_target = find_specific_variables(features, 'target', specific_value=True)

In [6]:
df_treino, df_valid = train_test_split(df, test_size=0.2, random_state=96)

In [7]:
print(f'Shape Treino: {df_treino.shape}')
print(f'Shape Valid: {df_valid.shape}')

Shape Treino: (26352, 19)
Shape Valid: (6588, 19)


In [8]:
print(f'% Treino: {df_treino[feature_target[0]].mean()}')
print(f'% Valid: {df_valid[feature_target[0]].mean()}')

% Treino: 0.11202185792349727
% Valid: 0.11262902246508803


In [9]:
df_hyperparams[df_hyperparams.value == max(df_hyperparams.value)].T

Unnamed: 0,23
number,23
value,0.806648
datetime_start,2025-04-02 09:25:33.723947
datetime_complete,2025-04-02 09:25:34.233851
duration,0 days 00:00:00.509904
params_class_weight,
params_learning_rate,0.0466
params_max_depth,5
params_min_child_samples,24
params_n_estimators,168


In [10]:
best_row = df_hyperparams.loc[df_hyperparams['value'].idxmax()]
best_params = best_row.filter(like='params_')
hyper_params = {col.replace('params_', ''): best_params[col] for col in best_params.index}


hyper_params.update({
    'eval_metric': 'auc',
})

In [11]:
hyper_params

{'class_weight': None,
 'learning_rate': 0.04659996319610005,
 'max_depth': 5,
 'min_child_samples': 24,
 'n_estimators': 168,
 'num_leaves': 44,
 'eval_metric': 'auc'}

In [12]:
model = LGBMClassifier(
    **hyper_params,
    random_state=12,
    n_jobs=-1
)

model

In [13]:
model.fit(
    df_treino[seletor.features].values,
    df_treino[feature_target].values,
    eval_set=[(df_valid[seletor.features].values, df_valid[feature_target].values)],
    verbose=True
)

[1]	valid_0's binary_logloss: 0.341069
[2]	valid_0's binary_logloss: 0.332616
[3]	valid_0's binary_logloss: 0.325654
[4]	valid_0's binary_logloss: 0.319922
[5]	valid_0's binary_logloss: 0.315021
[6]	valid_0's binary_logloss: 0.310718
[7]	valid_0's binary_logloss: 0.306942
[8]	valid_0's binary_logloss: 0.303645
[9]	valid_0's binary_logloss: 0.300577
[10]	valid_0's binary_logloss: 0.297876
[11]	valid_0's binary_logloss: 0.295429
[12]	valid_0's binary_logloss: 0.293168
[13]	valid_0's binary_logloss: 0.291227
[14]	valid_0's binary_logloss: 0.28943
[15]	valid_0's binary_logloss: 0.28776
[16]	valid_0's binary_logloss: 0.286268
[17]	valid_0's binary_logloss: 0.284948
[18]	valid_0's binary_logloss: 0.283728
[19]	valid_0's binary_logloss: 0.28259
[20]	valid_0's binary_logloss: 0.281516
[21]	valid_0's binary_logloss: 0.280562
[22]	valid_0's binary_logloss: 0.27973
[23]	valid_0's binary_logloss: 0.278963
[24]	valid_0's binary_logloss: 0.278256
[25]	valid_0's binary_logloss: 0.277594
[26]	valid_0'

In [14]:
pickle.dump(
    model, 
    open(os.path.join('..', 'models', 'predictors', 'model.pkl'), 'wb')
)