In [4]:
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task
from pmlb import fetch_data
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

SEED = 42

In [5]:

datasets = [("allbp", "multiclass"), ("Hill_Valley_with_noise", "binary"),("Hill_Valley_without_noise", "binary"),
            ("adult","binary"),("allhyper","multiclass"), ("breast_cancer","binary"),]


def lightAutoMLBenchmark(dataset_name, seed, data_task = 'binary'):
    df = fetch_data(dataset_name)

    # Split the data into training and testing sets
    train_data, test_data = train_test_split(df, test_size=0.2, random_state=seed, stratify=df["target"])

    # Define the task
    task = Task(data_task)

    # Define the roles
    roles = {
        'target': "target"
    }

    # Initialize the AutoML model
    automl = TabularAutoML(task=task, timeout=3600, cpu_limit=4)

    # Train the model
    oof_pred = automl.fit_predict(train_data, roles=roles)

   # Make predictions on the test set
    test_pred = automl.predict(test_data)

    if data_task == "binary":
        # Convert probabilities to binary predictions
        test_data['predictions'] = (test_pred.data[:, 0] > 0.5).astype(int)
    else:
        # Get predicted class
        test_data['predictions'] = test_pred.data.argmax(axis=1)
    acc = accuracy_score(test_data["predictions"], test_data["target"])
    return acc

In [6]:
automl_acc = []
for i in datasets:
    automl_acc.append(lightAutoMLBenchmark(i[0], SEED, i[1]))

for i in range(len(datasets)):
    print(f"Accuracy of LightAutoML on {datasets[i][0]}:{automl_acc[i]}")



Accuracy of LightAutoML on allbp:0.9735099337748344
Accuracy of LightAutoML on Hill_Valley_with_noise:0.7366255144032922
Accuracy of LightAutoML on Hill_Valley_without_noise:0.7448559670781894
Accuracy of LightAutoML on adult:0.8747057017094892
Accuracy of LightAutoML on allhyper:0.9920529801324504
Accuracy of LightAutoML on breast_cancer:0.7068965517241379
