In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.metrics import mean_squared_error
from flaml import AutoML


In [None]:
# Load the Diabetes dataset
data = load_diabetes()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="Target")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display dataset info
print("Feature Names:", data.feature_names)
print("Training Set Shape:", X_train.shape)
print("Test Set Shape:", X_test.shape)


In [None]:
# Initialize AutoML
automl = AutoML()

# Define AutoML settings
automl_settings = {
    "time_budget": 60,  # Total time in seconds
    "metric": "rmse",   # Evaluation metric
    "task": "regression",  # Task type
    "log_file_name": "automl.log",  # Log file
    "verbose": 1         # Display detailed progress
}


In [None]:
# Train the AutoML model
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

# Display the best model details
print("\nBest Model:", automl.best_estimator)
print("Best Hyperparameters:", automl.best_config)
print("Best Score (RMSE):", automl.best_loss)


In [None]:
# Make predictions on the test set
y_pred = automl.predict(X_test)

# Evaluate model performance
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"\nTest RMSE: {rmse:.4f}")


In [None]:
import matplotlib.pyplot as plt

# Retrieve feature importance
importance = automl.feature_importances_
features = X_train.columns

# Plot feature importance
plt.figure(figsize=(10, 6))
plt.barh(features, importance, color="skyblue")
plt.xlabel("Importance")
plt.title("Feature Importance")
plt.show()


In [None]:
import joblib

# Save the model
joblib.dump(automl.model, "automl_best_model.pkl")
print("\nModel saved successfully!")

# Reload the model
loaded_model = joblib.load("automl_best_model.pkl")
print("Model reloaded successfully!")


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Train a Random Forest model
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate the Random Forest model
rf_pred = rf.predict(X_test)
rf_rmse = mean_squared_error(y_test, rf_pred, squared=False)

print(f"\nManual Random Forest RMSE: {rf_rmse:.4f}")
print(f"AutoML RMSE: {rmse:.4f}")
