### Training Script ###

In [1]:
batch_size = 64
learning_rate = 0.05
momentum = 0.9
seed = 42

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
print("I am here")

# Compute Mean and Standard Deviation for Normalization
dataset = torchvision.datasets.ImageFolder("./ASL_dataset/asl_alphabet_train/asl_alphabet_train", transform=transforms.ToTensor())
loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=False, num_workers=4)

print("I am here")
mean = 0.0
std = 0.0
total_images = 0

for imgs, _ in loader:
    B = imgs.size(0)
    imgs = imgs.view(B, imgs.size(1), -1)
    mean += imgs.mean(dim=[0, 2])
    std += imgs.std(dim=[0, 2])
    total_images += 1

mean /= total_images
std /= total_images
print(f"Mean: {mean}")
print(f"STD: {std}")

KeyboardInterrupt: 

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

########################################################################
# The output of torchvision datasets are PILImage images of range [0, 1].
# We transform them to Tensors of normalized range [-1, 1].

# Transformations will be added here.
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

trainvalset = torchvision.datasets.ImageFolder(
        root = "./ASL_dataset/asl_alphabet_train/asl_alphabet_train",
        transform = transform
    )

# Train Validation Set
train_val_split = [0.9, 0.1]

# Train and Validation Set Split
trainset, valset = torch.utils.data.random_split(trainvalset,
                                [round(p * len(trainvalset)) for p in train_val_split],
                                generator=torch.Generator().manual_seed(seed))

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                        shuffle=True, num_workers=2)

valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                        shuffle=True, num_workers=2)

testset = torchvision.datasets.ImageFolder(
        root = "./ASL_dataset/asl_alphabet_test/asl_alphabet_train",
        transform = transform
    )

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                        shuffle=False, num_workers=2)



In [None]:
# CNN for ASL
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(kernel_size=(5,5), out_channels=32, in_channels=3) #
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(kernel_size=(3,3), out_channels=64, in_channels=32)
        self.fc1 = nn.Linear(147456 , 128) 
        self.fc2 = nn.Linear(128, 29)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net()

In [None]:
# Cross-Entropy Loss Criterion
import torch.optim as optim

criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)

In [None]:
# GPU Integration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)