In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
from torch.utils.data import DataLoader, TensorDataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("datamunge/sign-language-mnist")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/datamunge/sign-language-mnist?dataset_version_number=1...


100%|██████████| 62.6M/62.6M [00:00<00:00, 125MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/datamunge/sign-language-mnist/versions/1


In [None]:
# Construct file paths
path_sign_mnist_train = f"{path}/sign_mnist_train.csv"
path_sign_mnist_test = f"{path}/sign_mnist_test.csv"

def get_data(filename):
    with open(filename, 'r') as training_file:
        csv_reader = csv.reader(training_file, delimiter=',')
        first_line = True
        temp_images = []
        temp_labels = []
        for row in csv_reader:
            if first_line:
                # Skip header line
                first_line = False
            else:
                # Append label (first value)
                temp_labels.append(int(row[0]))
                # Append image data as a 28x28 array
                image_data = np.array(row[1:785], dtype=float)
                image_data_as_array = image_data.reshape(28, 28)
                temp_images.append(image_data_as_array)

        # Convert lists to numpy arrays
        images = np.array(temp_images)
        labels = np.array(temp_labels)
    return images, labels

# Load data
training_images, training_labels = get_data(path_sign_mnist_train)
testing_images, testing_labels = get_data(path_sign_mnist_test)


# Print shapes
print(training_images.shape)
print(training_labels.shape)
print(testing_images.shape)
print(testing_labels.shape)


(27455, 28, 28)
(27455,)
(7172, 28, 28)
(7172,)


In [None]:
# Load data
training_images, training_labels = get_data(path_sign_mnist_train)
testing_images, testing_labels = get_data(path_sign_mnist_test)

# Convert to torch tensors
training_images = torch.tensor(training_images, dtype=torch.float32)
testing_images = torch.tensor(testing_images, dtype=torch.float32)
training_labels = torch.tensor(training_labels, dtype=torch.long)
testing_labels = torch.tensor(testing_labels, dtype=torch.long)

# Normalize the images by dividing by 255 to scale them between 0 and 1
training_images /= 255.0
testing_images /= 255.0

# Flatten the images to (batch_size, 28*28) if you're using fully connected layers
training_images = training_images.view(-1, 1, 28, 28)  # Batch Size, Channels (1), Height (28), Width (28)
testing_images = testing_images.view(-1, 1, 28, 28)    # Same for testing data

# Create TensorDatasets
train_dataset = TensorDataset(training_images, training_labels)
test_dataset = TensorDataset(testing_images, testing_labels)

# Create DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Print shapes of tensors
print(training_images.shape)
print(training_labels.shape)
print(testing_images.shape)
print(testing_labels.shape)


torch.Size([27455, 1, 28, 28])
torch.Size([27455])
torch.Size([7172, 1, 28, 28])
torch.Size([7172])


In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()

        # Adjust the first layer for grayscale images (1 input channel)
        self.conv1 = nn.Conv2d(1, 6, 3)        # 1 input channel (grayscale), 6 filters, 5x5 kernel
        self.pool = nn.MaxPool2d(2, 2)         # Max pooling layer with 2x2 pool size
        self.conv2 = nn.Conv2d(6, 16, 3)       # 6 input channels, 16 filters, 5x5 kernel
        self.fc1 = nn.Linear(16*5*5, 120)      # Fully connected layer (output from conv2)
        self.fc2 = nn.Linear(120, 84)          # Another fully connected layer
        self.fc3 = nn.Linear(84, 26)           # Output layer (26 classes for the alphabet)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
import torch.optim as optim

model = ConvNet().to(device)

criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.Adam(model.parameters(), lr=0.005)

epochs = 2

for epoch in range(epochs):

    for i, (training_images, training_labels) in enumerate(train_loader):
      model.train()
      training_images = training_images.to(device)
      training_labels = training_labels.to(device)

      output = model(training_images).to(device)

      loss_val = criterion(output, training_labels)
      #l2_norm = sum(p.pow(2).sum() for p in model.parameters())
      #loss_val += 0.01 * l2_norm

      loss_val.backward()

      optimizer.step()

      optimizer.zero_grad()

    if epoch % 10 == 0:
      print(f"Epoch {epoch}, Loss: {loss_val.item()}")



Epoch 0, Loss: 0.11952153593301773


In [None]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for _ in range(26)]  # Assuming 26 classes (A-Z)
    n_class_samples = [0 for _ in range(26)]  # Assuming 26 classes (A-Z)

    # Iterate over the test set using the DataLoader
    for i, (testing_images, testing_labels) in enumerate(test_loader):

        testing_images = testing_images.to(device)
        testing_labels = testing_labels.to(device)

        # Get model outputs
        outputs = model(testing_images)

        # Get predicted labels by finding the max log-probability
        _, predicted = torch.max(outputs, 1)

        n_samples += testing_labels.size(0)
        n_correct += (predicted == testing_labels).sum().item()

        # Update class-level correct and sample counts
        for j in range(testing_labels.size(0)):
            label = testing_labels[j].item()  # Get the label as a Python integer
            pred = predicted[j].item()  # Get the predicted label as a Python integer

            # Check if the label is valid (i.e., within the range of your classes)
            if 0 <= label < 26:  # Ensure label is within class range
                if label == pred:
                    n_class_correct[label] += 1
                n_class_samples[label] += 1
            else:
                print(f"Warning: Invalid label {label} encountered!")

    # Calculate overall accuracy
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc:.2f} %')

    # Calculate class-wise accuracy
    for i in range(26):
        if n_class_samples[i] > 0:
            class_acc = 100.0 * n_class_correct[i] / n_class_samples[i]
            print(f'Accuracy of class {i}: {class_acc:.2f} %')


Accuracy of the network: 87.40 %
Accuracy of class 0: 95.17 %
Accuracy of class 1: 100.00 %
Accuracy of class 2: 99.03 %
Accuracy of class 3: 100.00 %
Accuracy of class 4: 94.58 %
Accuracy of class 5: 99.60 %
Accuracy of class 6: 93.68 %
Accuracy of class 7: 84.86 %
Accuracy of class 8: 92.71 %
Accuracy of class 10: 81.27 %
Accuracy of class 11: 100.00 %
Accuracy of class 12: 88.83 %
Accuracy of class 13: 46.74 %
Accuracy of class 14: 98.78 %
Accuracy of class 15: 93.95 %
Accuracy of class 16: 82.93 %
Accuracy of class 17: 72.22 %
Accuracy of class 18: 82.11 %
Accuracy of class 19: 48.79 %
Accuracy of class 20: 72.56 %
Accuracy of class 21: 85.26 %
Accuracy of class 22: 88.83 %
Accuracy of class 23: 92.13 %
Accuracy of class 24: 83.13 %
