


We will tackle this problem in 3 parts:
1. Pytorch Dataset
2. Pytorch Model
3. Pytorch Training Loop



In [None]:
import torch #generic package
import torch.nn as nn #neural networks
import torch.nn.functional as F #activation functions
import torch.optim as optim #optimizer

from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms #for standardizing input resolution
from torchvision.datasets import ImageFolder #automatially infers the class from the title of the subdirectories

#import timm #contains image models NOT GONNA USE BC OF BS kaggle download permissions

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
class PlayingCardDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        #here we give the init() a transform to apply to images as they are loaded in
        self.data = ImageFolder(data_dir, transform=transform)
        #we are outsourcing labelling and standardizing resolution 
        #to the ImageFolder class, which infers class labels from
        #names of subdirectories of the data_dir and applies
        #a transform to them automatically
        #note: ImageFolder() returns an image and a class
    
    def __len__(self):
        return len(self.data) #so that the rest of the pipeline knows how much data there is
    
    def __getitem__(self, idx):
        #if you wanted to do preprocessing only when each individual
        #example was accessed, you would do it here
        return self.data[idx]
        #getitem implements accessing an element via [] notation
        
    ###NONESSENTIAL, just for convenience--see the class labels in dataset
    @property #TODO understand decorators and properties
    def classes(self):
        return self.data.classes
        

## Make sure Dataset works

In [None]:
trial_dataset = PlayingCardDataset(
    data_dir= '/kaggle/input/cards-image-datasetclassification/train'
)

In [None]:
len(trial_dataset)

In [None]:
image, label = trial_dataset[6000] #the ImageFolder returns a tuple
print(label) #note: there are 53 different class including joker
image

In [None]:
#ImageFolder automatically encodes the classes as numbers
#it would be nice to know how the numbers map to names

#trial_dataset.data is an ImageDataset, and has a
#class_to_idx attribute which is a dictionary
#that holds this info
for k, v in trial_dataset.data.class_to_idx.items():
    print(f"{k}: {v}")

In [None]:
#all inputs to a network need to be an equal size tensor, 
#so let's implement the resizing and then turn an image to a tensor

transform = transforms.Compose([
    transforms.Resize([128, 128]),
    transforms.ToTensor(),
])

#create official training set
data_dir = '/kaggle/input/cards-image-datasetclassification/train'
train_dataset = PlayingCardDataset(data_dir, transform)




In [None]:
#let's see what it looks like
image, _ = train_dataset[0]
image #3 x 128 x 128, 128 grid for each color channel

In [None]:
# iterate over dataset
for image, label in train_dataset: #note: dataset is an iterable
    break
image.shape

In [None]:
batch_size = 32

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#shuffling is best practice



In [None]:
#DataLoader is an iterable that returns a batch as every element
#gives both the inputs and labels
for images, labels in train_dataloader:
    break

In [None]:
images.shape, labels.shape

In [None]:
labels

In [None]:
class CardClassifier(nn.Module): #inherit from nn.Module
    def __init__(self, num_classes):
        #where we define the parts of the model
        
        #call __init__() on super class to inherit methods from it
        #TODO: understand what the arguments 
        super(CardClassifier, self).__init__()

#         #we need the number of output classes to build the model, 
#         #so we pass it in
        
#         #let's pull in an image classifier
#         self.base_model = timm.create_model('efficientnet_b0', pretrained=True)
#         #this layer is used to taking in 3 x 128 x 128 images, spits out a single dense layer of 1280 neurons
#         enet_out_size = 1280
        
#         #one note: this base model actually has a final layer we don't want to use
#         #so we will configure an almost-complete version of it where we snip that off
#         #we do it by using the nn.Sequential() constructor, which takes in 
#         #a list of layers and connects them into a model. 
#         #we get that list of layers by indexing through 
#         #base_model.children(), which gives the layers. then we 
#         #convert it to a list and unpack the elements to feed them 
#         #in as arguments to nn.Sequential
#         self.truncated_base_model = nn.Sequential(*list(self.base_model.children()[:-1]))
        
#         #add a final fully connected layer that maps the output of the truncated model to classes
#         self.classification_layer = nn.Linear(enet_out_size, num_classes)
        
        #actually building from scratch
        #idea: 
        #conv layer with batch normalization that preserves size
        #max pool 
        #conv layer "" "" 
        #max pool 
        
        #flatten it out
        #linear down to 512
        #linear down to number of classes
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3, stride=1, padding=1) #gives 128*128*5 features
        self.bn1 = nn.BatchNorm2d(5) #makes each 2d layer have mean zero, unit variance, input is number of channels
        self.maxpool = nn.MaxPool2d(2,2) #stride = 2, so halves image size to 64x64, inputs are dimensions of kernel

        #note: since this doesnt have weights, we can define one instance of this and use it multiple times
        
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, stride=1, padding=1) #gives 64x64x10 features
        self.bn2 = nn.BatchNorm2d(10) #we now have 10 channels
        
        #note: flattening will happen in training loop
        self.linear1 = nn.Linear(32*32*10, 512) #in, out
        self.classification = nn.Linear(512, num_classes)
        return
   
    def forward(self, x):
        #where we take in an example or a batch and return it
        
        #in comes a batch. we want to feed it through the truncated_base_model,
        #then classification
        
#         #pass through base model
#         x = self.truncated_base_model(x)
#         #the truncated_base_model can take in batches of any size
#         #and returns an output correspondingly. so it takes in 
#         #batch_size x 3 x 128 x 128 and therefore spits out
#         #batch_size x 1280. then the final connected layer
#         #takes in any batch size as well and gives corresponding output
#         #so it takes in batch_size x 1280 and  outputs batch_size x num_classes
        
#         #note: idiomatic to keep ovewriting x throgh each nonterminal layer
        
#         output = self.classification_layer 

        #note: ReLU only happens after the normalization
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.maxpool(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.maxpool(x)
        
        #flatten x out so it can be passed into classifier
        x = x.view(-1, 32*32*10) #the -1 infers the number of entries in the batch, an the 32*32*10 is the number of neurons in the linear layer
        x = self.linear1(x)
        x = self.classification(x)
    
        return x #note: pytorch loss functions take in logits, so we don't have to apply softmax
        
        
        
        
        


In [None]:
model = CardClassifier(num_classes=53)
print(str(model))

In [None]:
example_out = model(images)
example_out.shape # [batch_size, num_classes]

In [None]:
criterion = nn.CrossEntropyLoss() #loss function is called criterion 

optimizer = optim.Adam(model.parameters(), lr=0.001)
#adam takes in the model's parameters and learning rate

In [None]:
#loss functions take in inputs and labels

criterion(example_out, labels)
#gives a tensor containing the value of the loss

#note: this will currently give an error because the 

In [None]:
#datasets require a transformation, so let's remake that first

transform = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
])

train_dataset = PlayingCardDataset(data_dir='/kaggle/input/cards-image-datasetclassification/train', transform=transform)
val_dataset = PlayingCardDataset(data_dir='/kaggle/input/cards-image-datasetclassification/valid', transform=transform)
test_dataset = PlayingCardDataset(data_dir='/kaggle/input/cards-image-datasetclassification/test', transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32)
test_dataloader = DataLoader(test_dataset, batch_size=32)

In [None]:
#move training to GPU if possible
#the way this works is you find out what device is possible
#then in the loop manually move the model and data to the device

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

num_epoch = 5
train_losses, val_losses = [], []

model = CardClassifier(num_classes=53)
model.to(device) #put model on GPU

for epoch in range(num_epoch):
    #set model to train mode TODO what does this mean
    model.train()
    running_loss = 0.0
    
    #feed each batch in
    for images, labels in train_dataloader:
        #put images, labels on GPU
        images, labels = images.to(device), labels.to(device)
        
        #optimizer keeps track of the gradients of the model wrt each parameter
        optimizer.zero_grad()
        outputs = model(images) #calls forward on them
        loss = criterion(outputs, labels)
        
        
        #pytorch's autograd engine is 'listening' to all the operations that happen on tensors
        #all the time and keeps track of how they relate to each other
        #so it knows that loss is connected to the model weights
        #by the fact that output was a function of the weights
        #and loss is a function of the output. it knows this 
        #without us needing to write out the connection. it is just 
        #listening for backward() calls, at which point it updates
        #all the params' gradients. 
        loss.backward() #computes gradients of each param wrt loss
        
        
        #note: optimizer explicitly took in the model's params
        #as arguments when it was initialized, so it is easy
        #to understand how this updates the weights
        optimizer.step() #actually changes parameters by the gradient
        
        
        #at the end of the day you want to know the avg loss 
        #per training example, so we add up all the individual losses
        #from each batch. then after we have gone through
        #every batch, we will average it out and save the value
        
        #note: the loss is the AVG loss for the batch bc the default
        #REDUCTION of pytorch losses is 'mean'
        #so to get the total loss for the entire batch, you have to 
        #multiply by the size of the batch
        #item() gets the value of the tensor, and images.size()
        #gets the batch size
        running_loss += loss.item() * images.size(0)
        
    #calc avg loss per example over entire training set and store
    train_loss = running_loss / len(train_dataloader.dataset) #need to put.dataset--oterwise will give number of batches
    train_losses.append(running_loss)
    
    
    #also monitor the validation losses
    model.eval() #TODO what does this do
    running_val_loss = 0.0
    with torch.no_grad(): #makes it so autograd does not automatically collect or deposit gradient info 
        #into tensors. here we don't call backward, so it's not strictly necessary,
        #but it DOES speed things up bc torch is no longer paying any mind to the gradients
        
        
        for images, labels in val_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_val_loss += loss.item() * images.size(0)
        val_loss = running_val_loss / len(val_dataloader.dataset)
        val_losses.append(val_loss)
        
    #log errors
    print(f"Epoch: {epoch+1}/{num_epoch} -- Train loss: {train_loss}, Val loss: {val_loss}")
        
    
    #OKAY loss s not changing much
    
    
    
    

In [None]:
plt.plot(train_losses, label='Training loss')
plt.plot(val_losses, label='Validation loss')
plt.legend()
plt.title("Loss over epochs")
plt.show()

# **Bonus:** Evaluating the Results



In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Load and preprocess the image
def preprocess_image(image_path, transform):
    image = Image.open(image_path).convert("RGB")
    return image, transform(image).unsqueeze(0)

# Predict using the model
def predict(model, image_tensor, device):
    model.eval()
    with torch.no_grad():
        image_tensor = image_tensor.to(device)
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
    return probabilities.cpu().numpy().flatten()

# Visualization
def visualize_predictions(original_image, probabilities, class_names):
    fig, axarr = plt.subplots(1, 2, figsize=(14, 7))
    
    # Display image
    axarr[0].imshow(original_image)
    axarr[0].axis("off")
    
    # Display predictions
    axarr[1].barh(class_names, probabilities)
    axarr[1].set_xlabel("Probability")
    axarr[1].set_title("Class Predictions")
    axarr[1].set_xlim(0, 1)

    plt.tight_layout()
    plt.show()

# Example usage
test_image = "/kaggle/input/cards-image-datasetclassification/test/five of diamonds/2.jpg"
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

original_image, image_tensor = preprocess_image(test_image, transform)
probabilities = predict(model, image_tensor, device)

# Assuming dataset.classes gives the class names
class_names = dataset.classes 
visualize_predictions(original_image, probabilities, class_names)

In [None]:
from glob import glob
test_images = glob('../input/cards-image-datasetclassification/test/*/*')
test_examples = np.random.choice(test_images, 10)

for example in test_examples:
    original_image, image_tensor = preprocess_image(example, transform)
    probabilities = predict(model, image_tensor, device)

    # Assuming dataset.classes gives the class names
    class_names = dataset.classes 
    visualize_predictions(original_image, probabilities, class_names)

# Todo

- Calculate the accuracy of our model on the validation and test set.