In [1]:
import os
import torch
import sys
import numpy as np
import pandas as pd
import timm
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import InterpolationMode
from torchvision.datasets import ImageFolder
from tqdm import tqdm

print(f"System Version: {sys.version}")
print(f"Pytorch Version: {torch.__version__}")
print(f"Pandas Version: {pd.__version__}")
print(f"Numpy Version: {np.__version__}")

System Version: 3.12.3 (main, Feb  4 2025, 14:48:35) [GCC 13.3.0]
Pytorch Version: 2.7.1+cpu
Pandas Version: 2.3.0
Numpy Version: 2.3.0


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Limiting CPU usages

os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"
os.environ["OPENBLAS_NUM_THREADS"] = "4"
os.environ["NUMEXPR_NUM_THREADS"] = "4"

torch.set_num_threads(4)
torch.set_num_interop_threads(1)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [4]:
class PlayingCardDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        super().__init__()
        self.data = ImageFolder(root=data_dir, transform=transform)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index]
    
    @property
    def classes(self):
        return self.data.classes

In [5]:
eff_net_b3_transform = transforms.Compose([
    transforms.Resize(320, interpolation=InterpolationMode.BICUBIC),  # Resize to 320x320
    transforms.CenterCrop(300),                                       # Center crop to 300x300
    transforms.ToTensor(),                                            # Converts [0,255] PIL image to [0,1] float tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406],                  # Normalize with ImageNet mean and std
                         std=[0.229, 0.224, 0.225])
])


In [6]:
training_dataset_dir = "card-classifier/data/train"
val_dataset_dir = "card-classifier/data/valid"

training_dataset = PlayingCardDataset(data_dir=training_dataset_dir, transform=eff_net_b3_transform)
val_dataset = PlayingCardDataset(data_dir=val_dataset_dir, transform=eff_net_b3_transform)

In [7]:
image, label = training_dataset[4000]
image

tensor([[[ 2.1633,  1.9407,  1.6667,  ...,  2.2489,  2.2489,  2.2489],
         [ 1.5297,  1.1187,  0.8447,  ...,  2.2489,  2.2489,  2.2489],
         [ 0.7591,  0.4508,  0.3823,  ...,  2.2489,  2.2489,  2.2489],
         ...,
         [ 2.2489,  2.2489,  2.2489,  ...,  1.3927,  0.9988,  0.6906],
         [ 2.2489,  2.2489,  2.2489,  ...,  0.5022,  0.2967,  0.3309],
         [ 2.2489,  2.2489,  2.2489,  ...,  0.1768,  0.2796,  0.5536]],

        [[ 1.7458,  1.3256,  0.8880,  ...,  2.4286,  2.4286,  2.4286],
         [ 0.7479,  0.0651, -0.3901,  ...,  2.4286,  2.4286,  2.4286],
         [-0.3375, -0.9328, -1.1604,  ...,  2.4286,  2.4286,  2.4286],
         ...,
         [ 2.4286,  2.4286,  2.4286,  ...,  0.2752, -0.2150, -0.6176],
         [ 2.4286,  2.4286,  2.4286,  ..., -0.7752, -1.0028, -0.9678],
         [ 2.4286,  2.4286,  2.4286,  ..., -1.0903, -0.9678, -0.6176]],

        [[ 1.9603,  1.5594,  1.1237,  ...,  2.6400,  2.6400,  2.6400],
         [ 0.8797,  0.2871, -0.1312,  ...,  2

In [8]:
len(training_dataset)

7624

In [9]:
training_dataset.classes

['ace of clubs',
 'ace of diamonds',
 'ace of hearts',
 'ace of spades',
 'eight of clubs',
 'eight of diamonds',
 'eight of hearts',
 'eight of spades',
 'five of clubs',
 'five of diamonds',
 'five of hearts',
 'five of spades',
 'four of clubs',
 'four of diamonds',
 'four of hearts',
 'four of spades',
 'jack of clubs',
 'jack of diamonds',
 'jack of hearts',
 'jack of spades',
 'joker',
 'king of clubs',
 'king of diamonds',
 'king of hearts',
 'king of spades',
 'nine of clubs',
 'nine of diamonds',
 'nine of hearts',
 'nine of spades',
 'queen of clubs',
 'queen of diamonds',
 'queen of hearts',
 'queen of spades',
 'seven of clubs',
 'seven of diamonds',
 'seven of hearts',
 'seven of spades',
 'six of clubs',
 'six of diamonds',
 'six of hearts',
 'six of spades',
 'ten of clubs',
 'ten of diamonds',
 'ten of hearts',
 'ten of spades',
 'three of clubs',
 'three of diamonds',
 'three of hearts',
 'three of spades',
 'two of clubs',
 'two of diamonds',
 'two of hearts',
 'two o

In [10]:
training_dataloader = DataLoader(dataset=training_dataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=16, shuffle=False)

In [11]:
for images, labels in training_dataloader:
    break

In [12]:
images.shape, labels.shape

(torch.Size([16, 3, 300, 300]), torch.Size([16]))

In [13]:
class CardClassifier(nn.Module):
    def __init__(self, num_classes = 53):
        super(CardClassifier, self).__init__()

        # Load efficientnet_b3 backbone
        print("🔧 Creating model...")
        self.backbone = timm.create_model('efficientnet_b3', pretrained=True)
        print("✅ Model loaded")
        in_features = self.backbone.classifier.in_features
        print(in_features)
        self.backbone.classifier = nn.Linear(in_features=in_features, out_features=num_classes)
    
    def forward(self, x):
        return self.backbone(x)

In [14]:
model = CardClassifier(num_classes=53)
model.to(device=device)

🔧 Creating model...
✅ Model loaded
1536


CardClassifier(
  (backbone): EfficientNet(
    (conv_stem): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNormAct2d(
      40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
          (bn1): BatchNormAct2d(
            40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (aa): Identity()
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_p

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

epochs = 10

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")

    # ===== TRAINING PHASE =====
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_loader_tqdm = tqdm(training_dataloader, desc="Training", leave=False)
    for images, labels in train_loader_tqdm:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Optional: show live loss in progress bar
        train_loader_tqdm.set_postfix(loss=loss.item())

    train_loss = running_loss / total
    train_acc = correct / total

    print(f"  Train Loss: {train_loss:.4f}, Accuracy: {train_acc*100:.2f}%")

    # ===== VALIDATION PHASE =====
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    val_loader_tqdm = tqdm(val_dataloader, desc="Validating", leave=False)
    with torch.no_grad():
        for images, labels in val_loader_tqdm:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            val_loader_tqdm.set_postfix(loss=loss.item())

    val_loss /= total
    val_acc = correct / total

    print(f"  Val   Loss: {val_loss:.4f}, Accuracy: {val_acc*100:.2f}%\n")
    if val_acc >= 0.95:
        print(f"Accutacy is reached 95% [{val_acc*100}%]. Stoping the training...")
        break


Epoch 1/10


                                                                       

  Train Loss: 2.1026, Accuracy: 49.07%


                                                                       

  Val   Loss: 0.4361, Accuracy: 91.32%

Epoch 2/10


                                                                        

  Train Loss: 0.4406, Accuracy: 89.76%


                                                                        

  Val   Loss: 0.1509, Accuracy: 96.60%

Accutacy is reached 95% [96.60377358490567%]. Stoping the training...




In [16]:
torch.save(model.state_dict(), 'card_classifier.pth')