In [1]:
from flaml import AutoML

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score

import pandas as pd
import numpy as np

In [2]:
# Load pd_train
pd_train = pd.read_parquet("data/training_class.parquet")
pd_train["label"] = pd_train["Class"].apply(lambda x: 1 if x == "Hepatotoxicity" else 0)
print(pd_train.shape)

(1241, 16094)


In [3]:
# Load pd_test
pd_test = pd.read_parquet("data/testing_class.parquet")
pd_test["label"] = pd_test["Class"].apply(lambda x: 1 if x == "Hepatotoxicity" else 0)
print(pd_test.shape)

(286, 16094)


In [4]:
X_train = pd_train.drop(columns=["Class", "label"])
y_train = pd_train["label"]

X_test = pd_test.drop(columns=["Class", "label"])
y_test = pd_test["label"]

In [2]:
X = pd.DataFrame(np.random.randn(1000, 20), columns=[f"feature_{i}" for i in range(20)])
y = np.random.randint(2, size=1000)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# FLAML optimization

In [None]:
# Initialize FLAML AutoML
automl = AutoML()

# Define FLAML settings
settings = {
    "time_budget": 3600,  # in seconds
    "metric": "roc_auc",  # Use 'roc_auc' for AUC
    "task": "classification",
    "log_file_name": "flaml_log.log",
    "n_jobs": 4,  # Use 4 cores
}

# Run AutoML
automl.fit(X_train=X_train, y_train=y_train, **settings)

In [None]:
# Evaluate on test data
print("Best model:", automl.model)
print("Test Accuracy:", automl.score(X_test, y_test))