In [4]:
pip install torch

Collecting torch
  Downloading torch-2.6.0-cp313-cp313-win_amd64.whl.metadata (28 kB)
Collecting filelock (from torch)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.6.0-cp313-cp313-win_amd64.whl (204.1 MB)
   ---------------------------------------- 0.0/204.1 MB ? eta -:--:--
   ---------------------------------------- 1.0/204.1 MB 10.1 MB/s eta 0:00:21
   - -------------------------------------- 7.6/204.1 MB 24.4 MB/s eta 0:00:09
   -- ------------------------------------- 14.4/204.1 MB 27.3 MB/s eta 0:00:07
   ---- ---------------------------

In [5]:
pip install torchvision

Collecting torchvision
  Downloading torchvision-0.21.0-cp313-cp313-win_amd64.whl.metadata (6.3 kB)
Collecting numpy (from torchvision)
  Downloading numpy-2.2.4-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Downloading pillow-11.2.1-cp313-cp313-win_amd64.whl.metadata (9.1 kB)
Downloading torchvision-0.21.0-cp313-cp313-win_amd64.whl (1.6 MB)
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   ---------------------------------------- 1.6/1.6 MB 12.4 MB/s eta 0:00:00
Downloading pillow-11.2.1-cp313-cp313-win_amd64.whl (2.7 MB)
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ---------------------------------------- 2.7/2.7 MB 23.7 MB/s eta 0:00:00
Downloading numpy-2.2.4-cp313-cp313-win_amd64.whl (12.6 MB)
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------- ----------------------- 5.2/12.6 MB 28.1 MB/s eta 0:00:01
   ---------------------------------- ----- 

In [33]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torchvision.datasets.folder import default_loader
from torch.utils.data import Dataset


In [71]:
DATASET_DIR = 'dataset'
BATCH_SIZE = 64
EPOCHS = 60
LEARNING_RATE = 0.0005
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class_map = {
    'closedLeftEyes': 0,
    'closedRightEyes': 0,
    'openLeftEyes': 1,
    'openRightEyes': 1
}


In [72]:
class EyeDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        #We load whole dataset from subfolders and put label on each image
        self.samples = []
        self.transform = transform
        for folder, label in class_map.items():
            folder_path = os.path.join(root_dir, folder)
            for fname in os.listdir(folder_path):
                if fname.endswith('.jpg'):
                    self.samples.append((os.path.join(folder_path, fname), label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        image = default_loader(path)
        if self.transform:
            image = self.transform(image)
        return image, label

In [73]:
''' 
    This is a lightweight CNN designed for binary eye state classification based on small input grayscale images (24x24 px) 
    of individual eyes. The architecture consists of two convolutional layers with BatchNorm and MaxPooling, followed by 
    two fully connected layers. Dropout is used to reduce overfitting. 
'''

class EyeBlinkCNN(nn.Module):
    def __init__(self):
        super(EyeBlinkCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(64 * 6 * 6, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool1(x)

        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool2(x)

        x = x.view(-1, 64 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [74]:
#This are optimal data augmentation for my model
train_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [75]:
dataset = EyeDataset(DATASET_DIR, transform=train_transforms)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)


In [76]:
model = EyeBlinkCNN().to(DEVICE)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


In [77]:
'''
    Standard training loop for a classification using cross-entropy loss and the Adam optimizer.
    For each epoch, the model is trained on all batches and accuracy is calculated over the full training set.

'''
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = 100 * correct / total
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')


Epoch 1/60, Loss: 0.4029, Accuracy: 81.26%
Epoch 2/60, Loss: 0.1857, Accuracy: 92.70%
Epoch 3/60, Loss: 0.1417, Accuracy: 94.51%
Epoch 4/60, Loss: 0.1196, Accuracy: 95.40%
Epoch 5/60, Loss: 0.1149, Accuracy: 95.46%
Epoch 6/60, Loss: 0.0986, Accuracy: 96.41%
Epoch 7/60, Loss: 0.0970, Accuracy: 96.26%
Epoch 8/60, Loss: 0.0996, Accuracy: 96.04%
Epoch 9/60, Loss: 0.0924, Accuracy: 96.60%
Epoch 10/60, Loss: 0.0848, Accuracy: 97.05%
Epoch 11/60, Loss: 0.0837, Accuracy: 96.82%
Epoch 12/60, Loss: 0.0827, Accuracy: 96.86%
Epoch 13/60, Loss: 0.0782, Accuracy: 97.13%
Epoch 14/60, Loss: 0.0726, Accuracy: 97.30%
Epoch 15/60, Loss: 0.0724, Accuracy: 97.32%
Epoch 16/60, Loss: 0.0705, Accuracy: 97.28%
Epoch 17/60, Loss: 0.0690, Accuracy: 97.48%
Epoch 18/60, Loss: 0.0605, Accuracy: 97.73%
Epoch 19/60, Loss: 0.0591, Accuracy: 97.96%
Epoch 20/60, Loss: 0.0663, Accuracy: 97.46%
Epoch 21/60, Loss: 0.0589, Accuracy: 97.79%
Epoch 22/60, Loss: 0.0579, Accuracy: 97.81%
Epoch 23/60, Loss: 0.0563, Accuracy: 97.9

In [78]:
torch.save(model.state_dict(), 'eye_blink_cnn.pth')
