## Custom Dataset

In [1]:
# Use .jpg images for now
import os
import glob
import cv2

import numpy as np

### Image Preprocessing

In [2]:
width = 1024
height = 1024

dir = os.getcwd()

#### Brightness

In [3]:
gamma = 0.35

inv_gamma = 1.0 / gamma
table = np.array([((i / 255.0) ** inv_gamma) * 255
                  for i in np.arange(0, 256)]).astype("uint8")

#### Rotation

In [4]:
rotation = cv2.getRotationMatrix2D(center=(width/2, height/2), angle=1, scale=1.0)
rotation

array([[ 0.9998477 ,  0.01745241, -8.85765202],
       [-0.01745241,  0.9998477 ,  9.01361218]])

#### Cropping and Scaling

### Image Labeling

### Dataset Split

In [5]:
# NOTE: Did with Roboflow to label images and create train, test datasets

### Dataset Augmentation

### Output Data

In [6]:
# Send prepared dataset as data input to the model


### Model

#### Dataset Definition

In [12]:
import cv2
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor

class CharacterDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform or ToTensor()

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]

        # Load the image
        image = cv2.imread(image_path)

        # Convert BGR image to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Apply transformations
        image = self.transform(image)

        return image, label


#### Create Image Paths

In [13]:
dir = "./train/"
image_paths = []
labels = []

for path, subdirs, files in os.walk(dir):
    for name in files:
        image_paths.append(os.path.join(path, name))
        labels.append(str(os.path.relpath(path, dir)))

In [14]:
dir = "./preprocessed/"
preprocessed_paths = []

for path, subdirs, files in os.walk(dir):
    for name in files:
        preprocessed_paths.append(os.path.join(path, name))

#### Apply Preprocessing Steps

In [15]:
i = 1
for file in glob.glob(pathname="**/*.jpg", root_dir="./train", recursive=True):
    img = cv2.imread("train/" + file)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized_image = cv2.resize(gray, (width, height))
    cropped_image = resized_image[int(height/10.0):int(height/1.1), int(width/4.0):int(width/1.3)]          # img[startY:endY, startX:endX] = resized_image[100:900, 250:750]
    distance = cv2.warpAffine(cropped_image, rotation, cropped_image.shape[1::-1])
    
    final_img = cv2.LUT(distance, table)
    # cv2.imwrite("./preprocessed/img_" + str(i) + ".jpg", final_img)

    # Custom window
    cv2.namedWindow("Image", cv2.WINDOW_KEEPRATIO)
    cv2.resizeWindow("Image", 750, 750)     # size of window; would be better as percentage/ratio
    cv2.moveWindow("Image", 600, 100)       # center window to 24" monitor; would be better as percentage
    cv2.imshow("Image", final_img)

    cv2.waitKey(250)
    i += 1

cv2.destroyAllWindows()

#### Model Definition

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Define your CNN architecture for face detection
class FaceDetectionCNN(nn.Module):
    def __init__(self):
        super(FaceDetectionCNN, self).__init__()
        # Define the layers of your CNN
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        # self.fc = nn.Linear(32 * 16 * 16, 3)  # Output size 3 for multi-class classification (one of 3 characters)          # NOTE: mat2; 8192x2 = (32*16*16,2)
        self.fc = nn.Linear(3276800, 3)     # FIXME: Changed for smoke test; change back when done.

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc(x)
        return x

# Create an instance of your CNN
model = FaceDetectionCNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Prepare your training data (assuming you have a dataset class)
train_dataset = CharacterDataset(image_paths, labels)
train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True)

# Training loop
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    print("Epoch count: ", epoch)
    running_loss = 0.0
    for images, train_labels in train_loader:
        print("Dataloader labels: ", train_labels)
        images = images.to(device)

        # Encode labels as integers
        label_encoder = LabelEncoder()
        integer_labels = label_encoder.fit_transform(train_labels)

        # Convert integer labels to tensor
        integer_labels_tensor = torch.tensor(integer_labels)

        # One-hot encode the integer labels
        onehot_labels = F.one_hot(integer_labels_tensor)

        # loss_labels = onehot_labels.to(device)
        loss_labels = integer_labels_tensor.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)                                     # NOTE: mat1; 9x3276800 = (9 total images, 640 width * 640 height * (24 bit depth / 3 channels) ) -> My guess
        print("Outputs: ", outputs)
        print("Labels: ", loss_labels)
        loss = criterion(outputs, loss_labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print the average loss for this epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

# Save the trained model
torch.save(model.state_dict(), "face_detection_model.pt")


Epoch count:  0
Dataloader labels:  ('John Casey', 'Chuck Bartowski', 'Sarah Walker')


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x3276800 and 8192x3)