In [1]:
import os
import torch
import sys
import numpy as np
import pandas as pd
import timm
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import InterpolationMode
from torchvision.datasets import ImageFolder
from tqdm import tqdm

print(f"System Version: {sys.version}")
print(f"Pytorch Version: {torch.__version__}")
print(f"Pandas Version: {pd.__version__}")
print(f"Numpy Version: {np.__version__}")

System Version: 3.12.3 (main, Feb  4 2025, 14:48:35) [GCC 13.3.0]
Pytorch Version: 2.7.1+cpu
Pandas Version: 2.3.0
Numpy Version: 2.3.0


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Limiting CPU usages

os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"
os.environ["OPENBLAS_NUM_THREADS"] = "4"
os.environ["NUMEXPR_NUM_THREADS"] = "4"

torch.set_num_threads(4)
torch.set_num_interop_threads(1)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [4]:
class PlayingCardDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        super().__init__()
        self.data = ImageFolder(root=data_dir, transform=transform)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index]
    
    @property
    def classes(self):
        return self.data.classes

In [5]:
eff_net_b3_transform = transforms.Compose([
    transforms.Resize(320, interpolation=InterpolationMode.BICUBIC),  # Resize to 320x320
    transforms.CenterCrop(300),                                       # Center crop to 300x300
    transforms.ToTensor(),                                            # Converts [0,255] PIL image to [0,1] float tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406],                  # Normalize with ImageNet mean and std
                         std=[0.229, 0.224, 0.225])
])


In [6]:
training_dataset_dir = "card-classifier/data/train"
val_dataset_dir = "card-classifier/data/valid"

training_dataset = PlayingCardDataset(data_dir=training_dataset_dir, transform=eff_net_b3_transform)
val_dataset = PlayingCardDataset(data_dir=val_dataset_dir, transform=eff_net_b3_transform)

In [None]:
image, label = training_dataset[4000]
image

In [None]:
len(training_dataset)

In [None]:
training_dataset.classes

In [7]:
training_dataloader = DataLoader(dataset=training_dataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=16, shuffle=False)

In [None]:
for images, labels in training_dataloader:
    break

In [None]:
images.shape, labels.shape

In [8]:
class CardClassifier(nn.Module):
    def __init__(self, num_classes = 53):
        super(CardClassifier, self).__init__()

        # Load efficientnet_b3 backbone
        print("🔧 Creating model...")
        self.backbone = timm.create_model('efficientnet_b3', pretrained=True)
        print("✅ Model loaded")
        in_features = self.backbone.classifier.in_features
        print(in_features)
        self.backbone.classifier = nn.Linear(in_features=in_features, out_features=num_classes)
    
    def forward(self, x):
        return self.backbone(x)

In [9]:
model = CardClassifier(num_classes=53)
model.to(device=device)

🔧 Creating model...
✅ Model loaded
1536


CardClassifier(
  (backbone): EfficientNet(
    (conv_stem): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNormAct2d(
      40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
          (bn1): BatchNormAct2d(
            40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (aa): Identity()
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_p

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

epochs = 10

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")

    # ===== TRAINING PHASE =====
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_loader_tqdm = tqdm(training_dataloader, desc="Training", leave=False)
    for images, labels in train_loader_tqdm:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Optional: show live loss in progress bar
        train_loader_tqdm.set_postfix(loss=loss.item())

    train_loss = running_loss / total
    train_acc = correct / total

    print(f"  Train Loss: {train_loss:.4f}, Accuracy: {train_acc*100:.2f}%")

    # ===== VALIDATION PHASE =====
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    val_loader_tqdm = tqdm(val_dataloader, desc="Validating", leave=False)
    with torch.no_grad():
        for images, labels in val_loader_tqdm:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            val_loader_tqdm.set_postfix(loss=loss.item())

    val_loss /= total
    val_acc = correct / total

    print(f"  Val   Loss: {val_loss:.4f}, Accuracy: {val_acc*100:.2f}%\n")
    if val_acc >= 0.95:
        print(f"Accutacy is reached 95% [{val_acc*100}%]. Stoping the training...")
        break


Epoch 1/7


                                                 

KeyboardInterrupt: 