In [None]:
import sys
sys.path.append("..")

from src.data_loader import DataLoader
from src.models import model_parameters
import pandas as pd
pd.set_option('display.max_colwidth', None)
# measure time
import time

# Load preprocessed data
dl = DataLoader(random_state=42)

results = []
for name, config in model_parameters.items():
    print(f'Current model: {name}')
    t0 = time.time()
    X_train, X_test, y_train, y_test = dl.get_data_train_test(scaled=config['scaled'], test_size=0.2)

    model = config['model']
    model.train(X_train, y_train)
    metrics = model.evaluate(X_test, y_test)

    results.append({
        'model': name,
        **metrics,
        'best_score': model.model.best_score_,
        'best_params': model.model.best_params_
    })
    print(f'Time: {time.time() - t0:.2f} [s]\n')

# Show results
df_results = pd.DataFrame(results).sort_values(by="accuracy", ascending=False).reset_index(drop=True)
df_results


Current model: random_forest
Time: 11.40 s
Current model: xgboost
Time: 2.66 s
Current model: light_gbm
Time: 8.84 s
Current model: logistic_regression
Time: 0.16 s




Unnamed: 0,model,accuracy,best_score,best_params
0,xgboost,0.934394,0.925954,"{'subsample': 0.8, 'n_estimators': 250, 'max_depth': 6, 'learning_rate': 0.01}"
1,light_gbm,0.932406,0.925947,"{'verbose': -1, 'subsample': 0.8, 'num_leaves': 78, 'n_estimators': 250, 'max_depth': 2, 'learning_rate': 0.01, 'colsample_bytree': 0.5}"
2,logistic_regression,0.932406,0.925947,"{'solver': 'liblinear', 'C': 0.0001}"
3,random_forest,0.930417,0.925954,"{'n_estimators': 100, 'min_samples_split': 9, 'max_depth': None}"
