In [1]:
import sys
sys.path.append("..")

from src.data_loader import DataLoader
from src.models import model_parameters
import pandas as pd
pd.set_option('display.max_colwidth', None)
# measure time
import time

# Load preprocessed data
dl = DataLoader(random_state=42)

results = []
for name, config in model_parameters.items():
    print(f'Current model: {name}')
    t0 = time.time()
    X_train, X_test, y_train, y_test = dl.get_data_train_test(scaled=config['scaled'], test_size=0.2)

    model = config['model']
    model.train(X_train, y_train)
    metrics = model.evaluate(X_test, y_test)
    et = round(time.time() - t0, 2)
    results.append({
        'model': name,
        'execution_time': et,
        **metrics,
        'best_score': model.model.best_score_,
        'best_params': model.model.best_params_
    })
    print(f'Time: {et:.2f} [s]')


# Show results
df_results = pd.DataFrame(results).sort_values(by="accuracy", ascending=False).reset_index(drop=True)
df_results


Current model: random_forest
Time: 6.76 [s]
Current model: xgboost
Time: 1.51 [s]
Current model: light_gbm
Time: 6.35 [s]
Current model: catboost
Time: 8.85 [s]
Current model: logistic_regression
Time: 0.14 [s]


Unnamed: 0,model,execution_time,accuracy,best_score,best_params
0,xgboost,1.51,0.934394,0.925954,"{'subsample': 0.8, 'n_estimators': 250, 'max_depth': 6, 'learning_rate': 0.01}"
1,catboost,8.85,0.932406,0.925947,"{'verbose': 0, 'n_estimators': 100, 'learning_rate': 0.01, 'l2_leaf_reg': 1, 'depth': 4}"
2,light_gbm,6.35,0.932406,0.925947,"{'verbose': -1, 'subsample': 0.8, 'num_leaves': 78, 'n_estimators': 250, 'max_depth': 2, 'learning_rate': 0.01, 'colsample_bytree': 0.5}"
3,logistic_regression,0.14,0.932406,0.925947,"{'solver': 'liblinear', 'C': 0.0001}"
4,random_forest,6.76,0.930417,0.925954,"{'n_estimators': 100, 'min_samples_split': 9, 'max_depth': None}"
