In [1]:
import time
import sys
sys.path.append("..")

from src.data_loader import DataLoader
from src.models import model_parameters
import pandas as pd
pd.set_option('display.max_colwidth', None)

# Load preprocessed data
dl = DataLoader(random_state=42)

# Due to the limited amount of memory and time available, choose models that you want to compare - uncomment the ones you want
print(f'All models: {list(model_parameters.keys())}')
models_to_compare = [
    'logistic_regression',
    'random_forest',
    'xgboost', 
    # 'light_gbm', 
    # 'catboost', 
    ]
models= {name: model_parameters[name] for name in models_to_compare}
models

All models: ['random_forest', 'xgboost', 'light_gbm', 'catboost', 'logistic_regression']


{'logistic_regression': {'model': <src.models.baseline_logistic_regression.LogisticRegressionModel at 0x2153f674ca0>,
  'scaled': True},
 'random_forest': {'model': <src.models.ensemble_random_forest.RandomForestModel at 0x2153db7d9f0>,
  'scaled': False},
 'xgboost': {'model': <src.models.ensemble_xgboost.XGBoostModel at 0x2153f3d1810>,
  'scaled': False}}

In [2]:
results = []
for name, config in models.items():
    print(f'Current model: {name}')
    X_train, X_test, y_train, y_test = dl.get_data_train_test(scaled=config['scaled'], test_size=0.2)

    t0 = time.time()
    model = config['model']
    model.train(X_train, y_train)
    metrics = model.evaluate(X_test, y_test)
    et = round(time.time() - t0, 2)
    results.append({
        'model': name,
        'execution_time': et,
        **metrics,
        'best_score': model.model.best_score_,
        'best_params': model.model.best_params_
    })
    print(f'Time: {et:.2f} [s]')

df_results = pd.DataFrame(results).sort_values(by="accuracy", ascending=False).reset_index(drop=True)

Current model: logistic_regression
Time: 4.04 [s]
Current model: random_forest
Time: 6.69 [s]
Current model: xgboost
Time: 2.35 [s]


In [3]:
df_results

Unnamed: 0,model,execution_time,accuracy,best_score,best_params
0,logistic_regression,4.04,0.932406,0.925946,"{'solver': 'liblinear', 'C': 0.0001}"
1,random_forest,6.69,0.932406,0.925946,"{'n_estimators': 300, 'min_samples_split': 5, 'max_depth': 4}"
2,xgboost,2.35,0.932406,0.925946,"{'subsample': 0.5, 'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.01}"
