In [1]:
# I create a tiny mixed-type dataset so I can see what AutoGluon engineers.
import pandas as pd, numpy as np

n = 1000
rng = np.random.default_rng(42)

df = pd.DataFrame({
    "num_feature": rng.normal(0, 1, n),                         # float
    "int_feature": rng.integers(0, 100, n),                     # int
    "dt_feature":  pd.Timestamp("2022-01-01") +
                   pd.to_timedelta(rng.integers(0, 365, n), unit="D"),  # datetime
    "cat_feature": rng.choice(["A", "B", "C"], n),              # categorical
    "text_feature": rng.choice(
        ["great area", "needs repair", "near park", "very spacious"], n
    ),                                                          # short text
})

# A simple target that actually depends on multiple types
month = df["dt_feature"].dt.month
cat_effect = df["cat_feature"].map({"A": 0.0, "B": 10.0, "C": -10.0})
txt_len = df["text_feature"].str.len()
noise = rng.normal(0, 2, n)

df["target"] = 5*df["num_feature"] + 0.3*df["int_feature"] + month + cat_effect + 0.05*txt_len + noise
print(df.shape)
df.head(3)


(1000, 6)


Unnamed: 0,num_feature,int_feature,dt_feature,cat_feature,text_feature,target
0,0.304717,18,2022-06-04,A,very spacious,12.210356
1,-1.039984,64,2022-06-25,C,near park,14.51288
2,0.750451,40,2022-03-29,C,great area,9.187307


In [2]:
# Install AutoGluon and set an output folder for artifacts
!pip -q install autogluon

from autogluon.tabular import TabularPredictor
import os
OUT = "/content/drive/MyDrive/ag-feature-eng"
os.makedirs(OUT, exist_ok=True)


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.5/259.5 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.1/225.1 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.9/454.9 kB[0m [31m39.9 MB/s[0m eta

In [3]:
# I train a small regression model; AutoGluon handles FE (dates, categoricals, text n-grams) by default.
label = "target"
predictor = TabularPredictor(
    label=label,
    problem_type="regression",
    path=OUT,
    eval_metric="rmse",
).fit(
    train_data=df,
    presets="medium_quality_faster_train",
    time_limit=180,     # ~3 minutes cap
    verbosity=2
)


Preset alias specified: 'medium_quality_faster_train' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Oct  2 10:42:05 UTC 2025
CPU Count:          2
Memory Avail:       11.34 GB / 12.67 GB (89.5%)
Disk Space Avail:   66.54 GB / 112.64 GB (59.1%)
Presets specified: ['medium_quality_faster_train']
Using hyperparameters preset: hyperparameters='default'
Beginning AutoGluon training ... Time limit = 180s
AutoGluon will save models to "/content/drive/MyDrive/ag-feature-eng"
Train Data Rows:    1000
Train Data Columns: 5
Label Column:       target
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    11612.20 MB
	Train Data (Original)  Memory Usage: 0.13 MB (0.0% of available memory)
	Inferring data type of each feat

In [4]:
# I view how my first 5 rows look after AutoGluon’s feature generator.
tf = predictor.transform_features(df.head(5))  # shows processed/engineered features
print("Original feature count (excl. target):", df.drop(columns=[label]).shape[1])
print("Transformed feature count:", tf.shape[1])
tf.head()


Original feature count (excl. target): 5
Transformed feature count: 8


Unnamed: 0,num_feature,int_feature,cat_feature,text_feature,dt_feature,dt_feature.month,dt_feature.day,dt_feature.dayofweek
0,0.304717,18,0,3,1654300800000000000,6,4,5
1,-1.039984,64,2,1,1656115200000000000,6,25,5
2,0.750451,40,2,0,1648512000000000000,3,29,1
3,0.940565,34,2,3,1665619200000000000,10,13,3
4,-1.951035,69,0,0,1659312000000000000,8,1,0


In [5]:
# I save a leaderboard and permutation feature importance for my repo.
lb = predictor.leaderboard(silent=False)
lb.to_csv(f"{OUT}/leaderboard.csv", index=False)

fi = predictor.feature_importance(df.sample(n=300, random_state=42))
fi.to_csv(f"{OUT}/feature_importance.csv")
fi.head(10)
print("Saved:", f"{OUT}/leaderboard.csv", "and", f"{OUT}/feature_importance.csv")


Computing feature importance via permutation shuffling for 5 features using 300 rows with 5 shuffle sets...
	1.83s	= Expected runtime (0.37s per shuffle set)


                 model  score_val              eval_metric  pred_time_val   fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0  WeightedEnsemble_L2  -2.192102  root_mean_squared_error       0.027131  23.542553                0.000527           0.016082            2       True         10
1      NeuralNetFastAI  -2.295852  root_mean_squared_error       0.009657   5.490075                0.009657           5.490075            1       True          6
2       NeuralNetTorch  -2.315659  root_mean_squared_error       0.008002  11.340859                0.008002          11.340859            1       True          8
3           LightGBMXT  -2.335057  root_mean_squared_error       0.006439   4.767474                0.006439           4.767474            1       True          1
4             CatBoost  -2.356656  root_mean_squared_error       0.002506   1.928062                0.002506           1.928062            1       True          4
5             LightGBM

	0.62s	= Actual runtime (Completed 5 of 5 shuffle sets)


Saved: /content/drive/MyDrive/ag-feature-eng/leaderboard.csv and /content/drive/MyDrive/ag-feature-eng/feature_importance.csv
