In [None]:
# demo_colab.ipynb
"""
Minimal Demo: CNN + XGBoost Ensemble
Dataset: CIFAR-10 (small, free)
"""

# ---- Setup ----
#!pip install torch torchvision xgboost scikit-learn joblib -q

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import warnings

In [3]:

# ---- Load CIFAR10 ----
transform = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
])
trainset = datasets.CIFAR10(root="./data_cifar", train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root="./data_cifar", train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)


In [4]:

# ---- Train a small CNN (ResNet18) ----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = models.resnet18(pretrained=False, num_classes=10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(2):  # just 2 epochs for demo
    model.train()
    for imgs, labels in trainloader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        out = model(imgs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()  
    print(f"Epoch {epoch} done")

# ---- Extract features ----
model.eval()
model.fc = nn.Identity()  # remove last layer
features, labels = [], []

with torch.no_grad():
    for imgs, labs in trainloader:
        feats = model(imgs.to(device)).cpu().numpy()
        features.append(feats)
        labels.append(labs.numpy())
features = np.vstack(features)
labels = np.hstack(labels)

print("Feature shape:", features.shape)


cuda




Epoch 0 done
Epoch 1 done
Feature shape: (50000, 512)


In [None]:

# ---- Train XGBoost on CNN features ----
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)
clf = xgb.XGBClassifier(
    tree_method="hist",      
    use_label_encoder=False,
    eval_metric="mlogloss"
)
clf.fit(X_train, y_train)
preds = clf.predict(X_val)
print("XGBoost Accuracy:", accuracy_score(y_val, preds))

# ---- Save models ----
joblib.dump(clf, "xgb_cnn_features.joblib")
torch.save(model.state_dict(), "resnet18_cnn.pth")
print("Saved CNN + XGBoost models")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Accuracy: 0.777
Saved CNN + XGBoost models
