In [2]:
# Mount my Google Drive (datasets + outputs live here)
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# Install AutoGluon Tabular (handles tabular + text features)
!pip -q install autogluon


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.5/259.5 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.1/225.1 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.9/454.9 kB[0m [31m38.9 MB/s[0m eta

In [4]:
# Imports and working folders
from autogluon.tabular import TabularPredictor
import pandas as pd, numpy as np, os

BASE = "/content/drive/MyDrive/Petfinder"     # folder with train.csv, test.csv, sample_submission.csv
OUT  = "/content/drive/MyDrive/ag-petfinder"  # outputs (models, leaderboard, submission)
os.makedirs(OUT, exist_ok=True)


In [5]:
# Read the three CSVs I uploaded to Drive
train  = pd.read_csv(f"{BASE}/train.csv")
test   = pd.read_csv(f"{BASE}/test.csv")
sample = pd.read_csv(f"{BASE}/sample_submission.csv")

label   = "AdoptionSpeed"      # multiclass target (0..4)
id_col  = "PetID"              # used for submission order
text_col = "Description" if "Description" in train.columns else None

print("train:", train.shape, "test:", test.shape, "| label:", label, "| text:", text_col)
train.head(3)


train: (14993, 24) test: (3972, 23) | label: AdoptionSpeed | text: Description


Unnamed: 0,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,...,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PetID,PhotoAmt,AdoptionSpeed
0,2,Nibble,3,299,0,1,1,7,0,1,...,1,1,100,41326,8480853f516546f6cf33aa88cd76c379,0,Nibble is a 3+ month old ball of cuteness. He ...,86e1089a3,1.0,2
1,2,No Name Yet,1,265,0,1,1,2,0,2,...,1,1,0,41401,3082c7125d8fb66f7dd4bff4192c8b14,0,I just found it alone yesterday near my apartm...,6296e909a,2.0,0
2,1,Brisco,1,307,0,1,2,7,0,2,...,1,1,0,41326,fa90fa5b1ee11c86938398b60abc32cb,0,Their pregnant mother was dumped by her irresp...,3422e4906,7.0,3


In [6]:
# Keep rows with non-empty text for a quick demo; drop the ID from features
rng = 42
mask = train[text_col].notna() if text_col else pd.Series([True]*len(train))
train_small = train[mask].sample(n=min(2000, mask.sum()), random_state=rng)

drop_cols = [c for c in [id_col] if c in train_small.columns]
train_simple = train_small.drop(columns=drop_cols)
test_simple  = test.drop(columns=[c for c in [id_col] if c in test.columns], errors="ignore")

print("train_small:", train_small.shape, "| test_simple:", test_simple.shape)


train_small: (2000, 24) | test_simple: (3972, 22)


In [7]:
# Multimodal tabular: include AutoMM so text (Description) is modeled properly
hyperparams = {
    "AG_AUTOMM": {},   # text (and image if present) via a pretrained backbone
    "GBM": {},         # LightGBM
    "CAT": {},         # CatBoost
    "XGB": {},         # XGBoost
}

predictor = TabularPredictor(
    label=label,
    problem_type="multiclass",
    path=OUT
).fit(
    train_data=train_simple,              # includes Description + tabular features (no PetID)
    hyperparameters=hyperparams,
    presets="medium_quality_faster_train",
    time_limit=480,                       # keep it capped; adjust if needed
    verbosity=2
)


Preset alias specified: 'medium_quality_faster_train' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Oct  2 10:42:05 UTC 2025
CPU Count:          2
Memory Avail:       11.27 GB / 12.67 GB (89.0%)
Disk Space Avail:   63.20 GB / 112.64 GB (56.1%)
Presets specified: ['medium_quality_faster_train']
Beginning AutoGluon training ... Time limit = 480s
AutoGluon will save models to "/content/drive/MyDrive/ag-petfinder"
Train Data Rows:    2000
Train Data Columns: 22
Label Column:       AdoptionSpeed
Problem Type:       multiclass
Preprocessing data ...
Train Data Class Count: 5
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    11500.26 MB
	Train Data (Original)  Memory Usage: 1.42 MB (0.0% of available memory)
	Inferring data type of each feature based on column valu

In [8]:
# Show and save the leaderboard for my repo
lb = predictor.leaderboard(silent=False)
lb.to_csv(f"{OUT}/leaderboard.csv", index=False)
print("Saved leaderboard ->", f"{OUT}/leaderboard.csv")


                 model  score_val eval_metric  pred_time_val   fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0             CatBoost     0.3925    accuracy       0.030523  27.578622                0.030523          27.578622            1       True          2
1  WeightedEnsemble_L2     0.3925    accuracy       0.031718  27.623454                0.001195           0.044831            2       True          4
2              XGBoost     0.3800    accuracy       0.012985  14.127369                0.012985          14.127369            1       True          3
3             LightGBM     0.3750    accuracy       0.058601   9.912819                0.058601           9.912819            1       True          1
Saved leaderboard -> /content/drive/MyDrive/ag-petfinder/leaderboard.csv


In [9]:
# Generate predictions for the test set
test_pred = predictor.predict(test_simple)
test_pred.head()


Unnamed: 0,AdoptionSpeed
0,4
1,4
2,2
3,4
4,4


In [10]:
# Match sample_submission schema + order (PetID, AdoptionSpeed)
sub = sample[[id_col]].copy()
sub[label] = test_pred.values[:len(sub)]

sub_path = f"{OUT}/submission.csv"
sub.to_csv(sub_path, index=False)
print("Saved submission ->", sub_path)
sub.head(3)


Saved submission -> /content/drive/MyDrive/ag-petfinder/submission.csv


Unnamed: 0,PetID,AdoptionSpeed
0,e2dfc2935,4
1,f153b465f,4
2,3c90f3f54,2
