## Custom Dataset

In [1]:
# Use .jpg images for now
import os
import glob
import cv2

import numpy as np

### Image Preprocessing

In [2]:
width = 1024
height = 1024

dir = os.getcwd()

#### Brightness

In [3]:
gamma = 0.35

inv_gamma = 1.0 / gamma
table = np.array([((i / 255.0) ** inv_gamma) * 255
                  for i in np.arange(0, 256)]).astype("uint8")

#### Rotation

In [4]:
rotation = cv2.getRotationMatrix2D(center=(width/2, height/2), angle=1, scale=1.0)
rotation

array([[ 0.9998477 ,  0.01745241, -8.85765202],
       [-0.01745241,  0.9998477 ,  9.01361218]])

#### Cropping and Scaling

### Image Labeling

### Dataset Split

In [5]:
# NOTE: Did with Roboflow to label images and create train, test datasets

### Dataset Augmentation

### Output Data

In [6]:
# Send prepared dataset as data input to the model


### Model

#### Dataset Definition

In [7]:
import cv2
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor

class CharacterDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform or ToTensor()

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]

        # Load the image
        image = cv2.imread(image_path)

        # Convert BGR image to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Apply transformations
        image = self.transform(image)

        return image, label


#### Create Image Paths

In [8]:
dir = "./train/"
image_paths = []
labels = []

for path, subdirs, files in os.walk(dir):
    for name in files:
        image_paths.append(os.path.join(path, name))
        labels.append(str(os.path.relpath(path, dir)))

In [9]:
dir = "./preprocessed/"
preprocessed_paths = []

for path, subdirs, files in os.walk(dir):
    for name in files:
        preprocessed_paths.append(os.path.join(path, name))

#### Apply Preprocessing Steps

In [10]:
i = 1
for file in glob.glob(pathname="**/*.jpg", root_dir="./train", recursive=True):
    img = cv2.imread("train/" + file)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized_image = cv2.resize(gray, (width, height))
    cropped_image = resized_image[int(height/10.0):int(height/1.1), int(width/4.0):int(width/1.3)]          # img[startY:endY, startX:endX] = resized_image[100:900, 250:750]
    distance = cv2.warpAffine(cropped_image, rotation, cropped_image.shape[1::-1])
    
    final_img = cv2.LUT(distance, table)
    # cv2.imwrite("./preprocessed/img_" + str(i) + ".jpg", final_img)

    # Custom window
    cv2.namedWindow("Image", cv2.WINDOW_KEEPRATIO)
    cv2.resizeWindow("Image", 750, 750)     # size of window; would be better as percentage/ratio
    cv2.moveWindow("Image", 600, 100)       # center window to 24" monitor; would be better as percentage
    cv2.imshow("Image", final_img)

    cv2.waitKey(500)
    i += 1

cv2.destroyAllWindows()

#### Model Definition

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Define your CNN architecture for face detection
class FaceDetectionCNN(nn.Module):
    def __init__(self):
        super(FaceDetectionCNN, self).__init__()
        # Define the layers of your CNN
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(in_features=3276800, out_features=3)  # Output size 3 for multi-class classification (one of 3 TV characters)          # NOTE: mat2; 8192x2 = (32*16*16,2)
        # self.fc = nn.Linear(32 * 16 * 16 * 25 * 16, 3)
        # self.fc2 = nn.Linear(in_features=1000, out_features=3)        # BUG: Breaks computer and makes it lag. Why?

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc(x)
        # x = self.fc2(x)
        return x

# Create an instance of your CNN
model = FaceDetectionCNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Prepare your training data (assuming you have a dataset class)
train_dataset = CharacterDataset(image_paths, labels)
train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True)

# Training loop
num_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# TODO: Need to create validation set to do hyperparameter tuning and reduce loss
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, train_labels in train_loader:
        images = images.to(device)

        # Encode labels as integers
        label_encoder = LabelEncoder()
        integer_labels = label_encoder.fit_transform(train_labels)

        # Convert integer labels to tensor
        integer_labels_tensor = torch.tensor(integer_labels)

        # One-hot encode the integer labels
        onehot_labels = F.one_hot(integer_labels_tensor)

        # loss_labels = onehot_labels.to(device)
        loss_labels = integer_labels_tensor.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)                                     # NOTE: mat1; 9x3276800 = (9 total images, 640 width * 640 height * (24 bit depth / 3 channels) ) -> My guess
        loss = criterion(outputs, loss_labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print the average loss for this epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

# Save the trained model
torch.save(model.state_dict(), "characterapp_model.pt")


Epoch [1/20], Loss: 10.8438
Epoch [2/20], Loss: 1.0363
Epoch [3/20], Loss: 0.9034
Epoch [4/20], Loss: 0.5747
Epoch [5/20], Loss: 1.7716
Epoch [6/20], Loss: 1.0675
Epoch [7/20], Loss: 1.0712
Epoch [8/20], Loss: 1.0377
Epoch [9/20], Loss: 0.8634
Epoch [10/20], Loss: 1.3226
Epoch [11/20], Loss: 0.9793
Epoch [12/20], Loss: 0.9214
Epoch [13/20], Loss: 0.8885
Epoch [14/20], Loss: 0.7882
Epoch [15/20], Loss: 0.9671
Epoch [16/20], Loss: 1.0121
Epoch [17/20], Loss: 0.9929
Epoch [18/20], Loss: 0.9775
Epoch [19/20], Loss: 1.0904
Epoch [20/20], Loss: 0.8699


### Test Dataset

#### Load model

In [12]:
model = FaceDetectionCNN()      # TODO: Try using .to(device) to see if device makes a difference
model.load_state_dict(torch.load("characterapp_model.pt"))

model.eval()

FaceDetectionCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=3276800, out_features=3, bias=True)
)

#### Get Predictions

In [13]:
dir = "./test/"
test_image_paths = []

for path, subdirs, files in os.walk(dir):
    for name in files:
        test_image_paths.append(os.path.join(path, name))

test_image_paths

['./test/Chuck Bartowski\\IMG_4747_JPG.rf.58fa7c9b41f1a852276425e8329f317a.jpg',
 './test/Chuck Bartowski\\IMG_4748_JPG.rf.fccc95a51b4753b7eb76924fab29d939.jpg',
 './test/John Casey\\IMG_4758_JPG.rf.44dcec4cd273ad617698cf53f7359df8.jpg',
 './test/John Casey\\IMG_4759_JPG.rf.fac4c83872471825061abb92b1db1fbd.jpg',
 './test/Sarah Walker\\IMG_4752_JPG.rf.42e9a89946f98de86f4bc4c7f4a7b42b.jpg',
 './test/Sarah Walker\\IMG_4753_JPG.rf.96e482bb0c7af439c3de4d9f56cfd98a.jpg']

In [14]:
test_labels = ["Chuck Bartowski", "Chuck Bartowski", "John Casey", "John Casey", "Sarah Walker", "Sarah Walker"]

In [15]:
test_dataset = CharacterDataset(test_image_paths, test_labels)
test_loader = DataLoader(test_dataset, batch_size=3, shuffle=False)
test_loader

<torch.utils.data.dataloader.DataLoader at 0x179ffe03f10>

In [19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.eval()
model.to(device)

with torch.no_grad():
    for images, test_labels in test_loader:
        images = images.detach().clone().to(device)        # NOTE: Fixed warning "UserWarning: To copy construct from a tensor..."

        # One-hot encode the integer labels
        label_encoder = LabelEncoder()                              # Encode labels as integers
        integer_labels = label_encoder.fit_transform(test_labels)
        integer_labels_tensor = torch.tensor(integer_labels)        # Convert integer labels to tensor
        onehot_labels = F.one_hot(integer_labels_tensor)
        test_integer_labels = onehot_labels.to(device)

        predictions = model(images)

        # Find the class label index with the largest corresponding probability
        predicted_indices = torch.argmax(predictions, dim=1)
        predicted_labels = [test_labels[idx] for idx in predicted_indices]
        print(predicted_labels)

        # TODO: Calculate accuracy with test_integer_labels and ground truth

['Chuck Bartowski', 'Chuck Bartowski', 'Chuck Bartowski']
['John Casey', 'John Casey', 'John Casey']
