In [1]:
!pip install torch
!pip install torchvision



In [2]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F  # For ReLU, etc.
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import cv2
from glob import glob

# Force PyTorch to use float32 as default type
torch.set_default_tensor_type(torch.FloatTensor)
torch.set_default_dtype(torch.float32)

  _C._set_default_tensor_type(t)


In [3]:
training_data_dir = os.path.join("large-dataset", "train")
test_data_dir = os.path.join("large-dataset", "test")


# Define mappings for ranks and suits
rank_map = {
    'ace': 0,
    '2': 1, 'two': 1,
    '3': 2, 'three': 2,
    '4': 3, 'four': 3,
    '5': 4, 'five': 4,
    '6': 5, 'six': 5,
    '7': 6, 'seven': 6,
    '8': 7, 'eight': 7,
    '9': 8, 'nine': 8,
    '10': 9, 'ten': 9,
    'jack': 10,
    'queen': 11,
    'king': 12,
}

suit_map = {
    'clubs': 0,
    'diamonds': 1,
    'hearts': 2,
    'spades': 3,
}

# Collect images and labels
X, ranks, suits = [], [], []

card_dirs = [d for d in glob(os.path.join(training_data_dir, '*')) if os.path.isdir(d)]
target_size = (224, 224)

for card_dir in card_dirs:
    card_name = os.path.basename(card_dir).lower()
    
    # Identify rank and suit from directory name
    found_rank = None
    found_suit = None
    
    
    # Look for rank
    for rank_name, rank_idx in rank_map.items():
        if rank_name in card_name and rank_name != 'joker':
            found_rank = rank_idx
            break
    
    # Look for suit
    for suit_name, suit_idx in suit_map.items():
        if suit_name in card_name and suit_name != 'joker':
            found_suit = suit_idx
            break
    
    if found_rank is None or found_suit is None:
        print(f"Skipping {card_name} - couldn't identify rank or suit")
        continue
    
    # Process all images in this folder
    image_paths = glob(os.path.join(card_dir, '*.jpg')) + glob(os.path.join(card_dir, '*.png'))
    
    for path in image_paths:
        if os.path.exists(path):
            img = cv2.imread(path)
            if img is not None:
                # Convert to RGB and resize
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, target_size)
                
                X.append(img)
                ranks.append(found_rank)
                suits.append(found_suit)

# Convert to NumPy arrays
X = np.array(X, dtype=np.float32) / 255.0
ranks = np.array(ranks, dtype=np.int64)
suits = np.array(suits, dtype=np.int64)

print(f"Loaded {len(X)} images with shape {X.shape}")

Skipping joker - couldn't identify rank or suit
Loaded 7509 images with shape (7509, 224, 224, 3)


In [4]:
class CardDataset(Dataset):
    def __init__(self, images, ranks, suits, transform=None):
        self.images = images
        self.ranks = ranks
        self.suits = suits
        self.transform = transform
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]  # shape (H, W, C), range [0,1]
        
        # If we have a transform pipeline that expects a PIL image,
        # we can convert here.
        if self.transform:
            import torchvision.transforms.functional as TF
            # Convert from NumPy array to PIL Image
            image = TF.to_pil_image(image)  
            image = self.transform(image)
        else:
            # Default: convert to tensor and permute to C,H,W
            image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
        
        return (
            image,
            torch.tensor(self.ranks[idx], dtype=torch.long),
            torch.tensor(self.suits[idx], dtype=torch.long)
        )

In [5]:
X_train, X_val, ranks_train, ranks_val, suits_train, suits_val = train_test_split(
    X, ranks, suits, test_size=0.2, random_state=42
)

print("Train set:", len(X_train), "images")
print("Val set:", len(X_val), "images")

Train set: 6007 images
Val set: 1502 images


In [6]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),      # Resize
    transforms.RandomHorizontalFlip(),  # 50% chance flip
    transforms.RandomRotation(180),      # Rotate up to ±180 degrees
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# No augmentations for validation (only resize + normalize)
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = CardDataset(X_train, ranks_train, suits_train, transform=train_transform)
val_dataset   = CardDataset(X_val,   ranks_val,   suits_val,   transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False)

In [7]:
class CardClassifier(nn.Module):
    def __init__(self):
        super(CardClassifier, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # For 224x224 input, after 3 pool layers => spatial size is 16x16
        self.flat_features = 64 * 28 * 28
        
        self.fc1 = nn.Linear(self.flat_features, 224)
        
        # Separate outputs for rank and suit
        self.rank_classifier = nn.Linear(224, 13)  
        self.suit_classifier = nn.Linear(224, 4)   
        
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        
        x = torch.flatten(x, 1)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        rank_output = self.rank_classifier(x)
        suit_output = self.suit_classifier(x)
        
        return rank_output, suit_output

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CardClassifier().to(device)

# Double-check all model parameters are float32
for param in model.parameters():
    param.data = param.data.float()

criterion_rank = nn.CrossEntropyLoss()
criterion_suit = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Gradient accumulation function
def train_epoch(model, loader, optimizer, accumulation_steps=4):
    model.train()
    total_loss = 0
    batch_count = 0
    
    optimizer.zero_grad()
    
    for i, (images, ranks_batch, suits_batch) in enumerate(loader):
        images = images.to(device, dtype=torch.float32)
        ranks_batch = ranks_batch.to(device)
        suits_batch = suits_batch.to(device)
        
        rank_outputs, suit_outputs = model(images)
        rank_loss = criterion_rank(rank_outputs, ranks_batch)
        suit_loss = criterion_suit(suit_outputs, suits_batch)
        
        loss = rank_loss + suit_loss
        loss = loss / accumulation_steps
        
        loss.backward()
        
        if (i + 1) % accumulation_steps == 0 or (i + 1) == len(loader):
            optimizer.step()
            optimizer.zero_grad()
        
        total_loss += loss.item() * accumulation_steps
        batch_count += 1
        
        if (i + 1) % 10 == 0:
            print(f"  Batch {i+1}/{len(loader)}, Loss: {loss.item()*accumulation_steps:.4f}")
    
    return total_loss / batch_count

In [9]:
num_epochs = 100
best_loss = float('inf')
patience = 4
no_improve_epochs = 0

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs} started")
    try:
        epoch_loss = train_epoch(model, train_loader, optimizer, accumulation_steps=4)
        print(f"Epoch {epoch+1} Loss: {epoch_loss:.4f}")
        
        # Check improvement
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            torch.save(model.state_dict(), 'card_classifier_best_224.pth')
            no_improve_epochs = 0
            print(f"  Model saved (improved loss: {best_loss:.4f})")
        else:
            no_improve_epochs += 1
            print(f"  No improvement for {no_improve_epochs} epochs")
        
        if no_improve_epochs >= patience:
            print("Early stopping triggered.")
            break
            
    except KeyboardInterrupt:
        print("\nTraining interrupted by user.")
        break

# Save the final model
torch.save(model.state_dict(), 'card_classifier_224_50.pth')
print("Final model saved.")

Epoch 1/100 started
  Batch 10/188, Loss: 5.7883
  Batch 20/188, Loss: 4.0914
  Batch 30/188, Loss: 4.0287
  Batch 40/188, Loss: 3.8701
  Batch 50/188, Loss: 3.9099
  Batch 60/188, Loss: 3.8446
  Batch 70/188, Loss: 3.7425
  Batch 80/188, Loss: 3.6646
  Batch 90/188, Loss: 3.5791
  Batch 100/188, Loss: 3.5086
  Batch 110/188, Loss: 3.4837
  Batch 120/188, Loss: 3.3679
  Batch 130/188, Loss: 3.4633
  Batch 140/188, Loss: 3.7583
  Batch 150/188, Loss: 3.5371
  Batch 160/188, Loss: 3.3744
  Batch 170/188, Loss: 3.4931
  Batch 180/188, Loss: 3.3276
Epoch 1 Loss: 3.7879
  Model saved (improved loss: 3.7879)
Epoch 2/100 started
  Batch 10/188, Loss: 3.5911
  Batch 20/188, Loss: 3.1769
  Batch 30/188, Loss: 3.0839
  Batch 40/188, Loss: 3.1905
  Batch 50/188, Loss: 3.4016
  Batch 60/188, Loss: 3.2000
  Batch 70/188, Loss: 3.3788
  Batch 80/188, Loss: 3.3118
  Batch 90/188, Loss: 3.0750
  Batch 100/188, Loss: 3.0651
  Batch 110/188, Loss: 3.0165
  Batch 120/188, Loss: 3.0451
  Batch 130/188, Lo

In [None]:
delte this