# Week 14: CNN Lab - Rock, Paper, Scissors

**Objective:** Build, train, and test a Convolutional Neural Network (CNN) to classify images of hands playing Rock, Paper, or Scissors.

### Step 1: Setup and Data Download

This first cell downloads the dataset from Kaggle.

In [1]:
import kagglehub

path = kagglehub.dataset_download("drgfreeman/rockpaperscissors")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'rockpaperscissors' dataset.
Path to dataset files: /kaggle/input/rockpaperscissors


In [2]:
import shutil
import os

src_root = "/kaggle/input/rockpaperscissors"
dst_root = "/content/dataset"

os.makedirs(dst_root, exist_ok=True)

folders_to_copy = ["rock", "paper", "scissors"]

for folder in folders_to_copy:
    src_path = os.path.join(src_root, folder)
    dst_path = os.path.join(dst_root, folder)

    if os.path.exists(src_path):
        shutil.copytree(src_path, dst_path, dirs_exist_ok=True)
        print("Copied:", folder)
    else:
        print("Folder not found:", folder)



Copied: rock
Copied: paper
Copied: scissors


### Step 2: Imports and Device Setup

Import the necessary libraries and check if a GPU is available.

In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from PIL import Image
import numpy as np

# Set the 'device' variable: use GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device:", device)


Using device: cpu


### Step 3: Data Loading and Preprocessing

Here we will define our image transformations, load the dataset, split it, and create DataLoaders.

In [4]:
DATA_DIR = "/content/dataset"

# Define the image transforms
# 1. Resize to 128x128
# 2. Convert to Tensor
# 3. Normalize (mean=0.5, std=0.5)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Load dataset using ImageFolder
full_dataset = datasets.ImageFolder(DATA_DIR, transform=transform)

class_names = full_dataset.classes
print("Classes:", class_names)

# Split dataset (80% train, 20% test)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size

# Create train and test datasets
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Total images: {len(full_dataset)}")
print(f"Training images: {len(train_dataset)}")
print(f"Test images: {len(test_dataset)}")


Classes: ['paper', 'rock', 'scissors']
Total images: 2188
Training images: 1750
Test images: 438


### Step 4: Define the CNN Model

Fill in the `conv_block` and `fc_block` with the correct layers.

In [5]:
class RPS_CNN(nn.Module):
    def __init__(self):
        super(RPS_CNN, self).__init__()

        # --- Convolutional Feature Extractor ---
        self.conv_block = nn.Sequential(
            # Block 1: 3 → 16
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Block 2: 16 → 32
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Block 3: 32 → 64
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # After 3 pooling layers:
        # 128 -> 64 -> 32 -> 16
        # Channels = 64 → Flatten size = 64 * 16 * 16 = 16384
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 16 * 16, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 3)     # 3 output classes
        )

    def forward(self, x):
        x = self.conv_block(x)
        x = self.fc(x)
        return x


# --- Model, Loss, Optimizer ---
# 1. Initialize model and move to device
model = RPS_CNN().to(device)

# 2. Loss function: CrossEntropyLoss
criterion = nn.CrossEntropyLoss()

# 3. Optimizer: Adam
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(model)


RPS_CNN(
  (conv_block): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=16384, out_features=256, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=256, out_features=3, bias=True)
  )
)


### Step 5: Train the Model

Fill in the core training steps inside the loop.

In [6]:
EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()  # Set the model to training mode
    total_loss = 0

    for images, labels in train_loader:
        # Move data to the correct device
        images, labels = images.to(device), labels.to(device)

        # 1. Clear old gradients
        optimizer.zero_grad()

        # 2. Forward pass
        outputs = model(images)

        # 3. Compute loss
        loss = criterion(outputs, labels)

        # 4. Backward pass
        loss.backward()

        # 5. Update parameters
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS}, Loss = {total_loss/len(train_loader):.4f}")

print("Training complete!")


Epoch 1/10, Loss = 0.5934
Epoch 2/10, Loss = 0.1471
Epoch 3/10, Loss = 0.0754
Epoch 4/10, Loss = 0.0311
Epoch 5/10, Loss = 0.0262
Epoch 6/10, Loss = 0.0105
Epoch 7/10, Loss = 0.0135
Epoch 8/10, Loss = 0.0113
Epoch 9/10, Loss = 0.0042
Epoch 10/10, Loss = 0.0021
Training complete!


### Step 6: Evaluate the Model

Test the model's accuracy on the unseen test set.

In [7]:
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

# Disable gradient calculation
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # 1. Forward pass – get raw outputs (logits)
        outputs = model(images)

        # 2. Get predicted class
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 98.40%


### Step 7: Test on a Single Image

Let's see how the model performs on one image.

In [8]:
def predict_image(model, img_path):
    model.eval()

    img = Image.open(img_path).convert("RGB")
    # Apply the same transforms as training, and add a batch dimension (unsqueeze)
    img = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        # 1. Forward pass (logits)
        output = model(img)

        # 2. Predicted class (index of max logit)
        _, pred = torch.max(output, 1)

    return class_names[pred.item()]

# Test the function (path must exist)
test_img_path = "/content/dataset/paper/0Uomd0HvOB33m47I.png"
prediction = predict_image(model, test_img_path)
print(f"Model prediction for {test_img_path}: {prediction}")


Model prediction for /content/dataset/paper/0Uomd0HvOB33m47I.png: paper


### Step 8: Play the Game!

This code is complete. If your model is trained, you can run this cell to have the model play against itself.

In [9]:
import random
import os

def pick_random_image(class_name):
    folder = f"/content/dataset/{class_name}"
    files = os.listdir(folder)
    img = random.choice(files)
    return os.path.join(folder, img)

def rps_winner(move1, move2):
    if move1 == move2:
        return "Draw"

    rules = {
        "rock": "scissors",
        "paper": "rock",
        "scissors": "paper"
    }

    if rules[move1] == move2:
        return f"Player 1 wins! {move1} beats {move2}"
    else:
        return f"Player 2 wins! {move2} beats {move1}"


# -----------------------------------------------------------
# 1. Choose any two random classes
# -----------------------------------------------------------

choices = ["rock", "paper", "scissors"]
c1 = random.choice(choices)
c2 = random.choice(choices)

img1_path = pick_random_image(c1)
img2_path = pick_random_image(c2)

print("Randomly selected images:")
print("Image 1:", img1_path)
print("Image 2:", img2_path)


# -----------------------------------------------------------
# 2. Predict their labels using the model
# -----------------------------------------------------------

p1 = predict_image(model, img1_path)
p2 = predict_image(model, img2_path)

print("\nPlayer 1 shows:", p1)
print("Player 2 shows:", p2)

# -----------------------------------------------------------
# 3. Decide the winner
# -----------------------------------------------------------

print("\nRESULT:", rps_winner(p1, p2))

Randomly selected images:
Image 1: /content/dataset/paper/yaRRMcZS7JJ3xLK1.png
Image 2: /content/dataset/rock/I8DyB4yRN0Q8voWp.png

Player 1 shows: paper
Player 2 shows: rock

RESULT: Player 1 wins! paper beats rock
