##Importing Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount("/content/drive", force_remount= True)

##CNN

In [None]:
class CNN(nn.Module):
  def __init__(self, in_channels = 1, num_classes = 2):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size=(3,3),  stride = (1,1), padding = (1,1))
    self.pool = nn.MaxPool2d(kernel_size = (2,2), stride = (2,2))
    self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size=(3,3),  stride = (1,1), padding = (1,1))
    self.fc1 = nn.Linear(16*12*12, 64)
    self.fc2 = nn.Linear(64, 8)
    self.fc3 = nn.Linear(8, 2)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.pool(x)
    x = F.relu(self.conv2(x))
    x = self.pool(x)
    x = x.reshape(x.shape[0], -1)
    x = self.fc1(x)
    x = self.fc2(x)
    x = self.fc3(x)
    return x

##Data Importing

In [None]:
#Function for importing datasets
#Folder --> file arrays --> individual images --> face detection and crop --> image array

def create_img_array(data_max, usable_max, filepath, progressDisplay = False):
  seen_files = []
  image_path_array = os.listdir(filepath)
  new_images_np_array = []
  data_finished = 0
  data_unusable = 0
  x = 0

  while (True):

    if x == data_max:
      break
    if data_finished == usable_max:
      break

    filename = image_path_array[x].split('_20')[0]
    if filename in seen_files:
      x += 1
      continue
    seen_files.append(filename)
    final_image = f"{filepath}/{image_path_array[x]}"

    frame = cv2.imread(final_image)
    faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = faceCascade.detectMultiScale(gray, 1.1, 4)

    for x_, y, w, h, in faces:
      roi_gray = gray[y:y+h, x_:x_+w]
      roi_color = frame[y:y+h, x_:x_+w]
      cv2.rectangle(frame, (x_,y), (x_+w, y+h), (0, 255, 0), 2)
      facess = faceCascade.detectMultiScale(roi_gray)
      for (ex, ey, ew, eh) in facess:
        face_roi = roi_color[ey: ey+eh, ex:ex + ew]

    try:
      face_roi = (cv2.cvtColor(face_roi, cv2.COLOR_BGR2GRAY))
      final_image = cv2.resize(face_roi, (48, 48))
      new_images_np_array.append(final_image)
      data_finished += 1
    except:
      if progressDisplay:
        print("Data unusable")
        data_unusable += 1

    if progressDisplay:
      if (x + 1) % 10 == 0:
        print(f"{x + 1}/{data_max}")
  
  print(f"{data_finished}/{data_max} Successfully used")
  return new_images_np_array

In [None]:
#normalize array range from 0 - 255 to 0 - 1
def normalize(data_array):
  new_array = []
  for x in range(len(data_array)):
      new = data_array[x].astype("float32")
      new /= 255
      new_array.append(new)
  return new_array

In [None]:
loser_path = #file path
winner_path = #file path

In [None]:
#Number of files
import os
loser_numof_files = 0
for path in os.listdir(loser_path):
    if os.path.isfile(os.path.join(loser_path, path)):
        loser_numof_files += 1
print('Loser1:', loser_numof_files, "files")

winner_numof_files = 0
for path in os.listdir(winner_path):
    if os.path.isfile(os.path.join(winner_path, path)):
        winner_numof_files += 1
print('Winner1:', winner_numof_files, "files")

Loser1: 1065 files
Winner1: 1129 files


In [None]:
#Create image array
import cv2
loser_faces = create_img_array(loser_numof_files, loser_numof_files, loser_path, progressDisplay = False)
winner_faces = create_img_array(winner_numof_files, len(loser_faces), winner_path, progressDisplay = False)

1006/1065 Successfully used
1006/1129 Successfully used


In [None]:
#Normalize 1-256 --> 0-1
winner_faces_normalized = normalize(winner_faces)
loser_faces_normalized = normalize(loser_faces)

##Data Randominzation & Organization

In [None]:
#train test split, creating labels
import numpy as np
from sklearn.model_selection import train_test_split
wf,lf = winner_faces_normalized,loser_faces_normalized
winner_labels = [1] * len(wf)
loser_labels = [0] * len(lf)
print(len(wf))
print(len(lf))

labels = winner_labels + loser_labels

faces = np.concatenate([wf, lf], axis = 0)
x_train, x_test, y_train, y_test = train_test_split(faces, labels)

1006
1006


In [None]:
#batching
def roundDown(n):
    return int("{:.0f}".format(n))
    print(n)
def batch(all_data, all_labels, batch_size):
  num = roundDown(len(all_data)/batch_size)
  new_data = []
  new_labels = []
  for x in range(num - 1):
    temp_data = []
    temp_list = []
    for y in range(batch_size):
      temp_data.append(all_data[0])
      all_data.pop(0)
      temp_list.append(all_labels[0])
      all_labels.pop(0)
    new_data.append([temp_data])
    new_labels.append(temp_list)
  return new_data, new_labels

In [None]:
batched_train_data, batched_train_label = batch(x_train.tolist(), y_train, 64)
batched_test_data, batched_test_label = batch(x_test.tolist(), y_test, 64)

In [None]:
train_data_data = torch.as_tensor(batched_train_data)
train_data_label = torch.as_tensor(batched_train_label)
print(train_data_data.shape) #Expected: torch.Size([23, 1, 64, 48, 48])
print(train_data_label.shape) #Expected: torch.Size([23, 64])

torch.Size([23, 1, 64, 48, 48])
torch.Size([23, 64])


In [None]:
test_data_data = torch.as_tensor(batched_test_data)
test_data_label = torch.as_tensor(batched_test_label)
print(test_data_data.shape) #Expected: torch.Size([7, 1, 64, 48, 48])
print(test_data_label.shape) #Expected: torch.Size([7, 64])

torch.Size([7, 1, 64, 48, 48])
torch.Size([7, 64])


##Accuracy Calculation

In [None]:
def check_accuracy(data, label, model, printing=False, matrix_data = False):
  num_correct = 0
  num_samples = 0
  model.eval()

  WW = 0
  WL = 0
  LW = 0
  LL = 0
  all_answers = []
  all_predictions = []

  with torch.no_grad():
    for i in range(len(data)):
      x = torch.as_tensor(data[i]).transpose(0, 1)
      x = x.to(device = device)
      y = label[i]
      y = y.to(device = device)
      
      scores = model(x)
      _, predictions = scores.max(1)
      num_correct += (predictions == y).sum()
      num_samples += predictions.size(0)
      if (matrix_data):
        all_answers.extend(label[i])
        all_predictions.extend(predictions.tolist())
    if (printing):
      print(f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}")


  model.train()

  if (matrix_data):
    for x in range(len(all_answers)):
      if (all_answers[x] == 1 and all_predictions[x] == 1):
        WW = WW + 1
      if (all_answers[x] == 1 and all_predictions[x] == 0):
        LW = LW + 1
      if (all_answers[x] == 0 and all_predictions[x] == 1):
        WL = WL + 1
      if (all_answers[x] == 0 and all_predictions[x] == 0):
        LL = LL + 1
  if (matrix_data):
    return float(num_correct)/float(num_samples)*100, WW, WL, LW, LL
  else:
    return float(num_correct)/float(num_samples)*100

##Parameters & Training

In [None]:
num_epochs = 500
learning_rate = 0.0001
batch_size = 64

In [None]:
# Constant Parameters
num_classes = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [None]:
model.load_state_dict(torch.load("/content/drive/MyDrive/CNNWeights_Apr8_100E_NewD_0.001LR.torch"))

<All keys matched successfully>

In [None]:
loss_list = []
weight_list = []
max_test_acc = 0
train_acc = 0
epoch_val = 0
WW_list = []
WL_list = []
LW_list = []
LL_list = []
for epoch in range(num_epochs):
  total_loss = 0
  for x in range(23):
    data = torch.as_tensor(train_data_data[x]).transpose(0, 1)
    data = data.to(device = device)
    targets = train_data_label[x]
    targets = targets.to(device = device)

    scores = model(data)
    loss = criterion(scores, targets)
    total_loss += loss.item()

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()
  accuracy, WW, WL, LW, LL = check_accuracy(test_data_data, test_data_label, model, matrix_data = True)
  WW_list.append(WW)
  WL_list.append(WL)
  LW_list.append(LW)
  LL_list.append(LL)
  print(f"Epoch {epoch+1}/{num_epochs} Loss: {total_loss:.4f} Testing Accuracy: {accuracy:.2f}%")
  if accuracy > max_test_acc:
    max_test_acc = accuracy
    train_acc = check_accuracy(train_data_data, train_data_label, model)
    epoch_val = epoch
  loss_list.append(total_loss)
  weight_list.append(model.state_dict())

In [None]:
#Test new model
print(f"Best Testing Accuracy: {max_test_acc:.2f} ({epoch_val + 1}th epoch)")
print(f"Training Accuracy: {train_acc:.2f}\n")
print("                   Actually Winners    Actually Losers\n"
    + f"Predicted Winners: {WW_list[epoch_val]}                 {WL_list[epoch_val]}\n"
    + f"Predicted Losers:  {LW_list[epoch_val]}                  {LL_list[epoch_val]}")

Best Testing Accuracy: 68.75 (192th epoch)
Training Accuracy: 96.60

                   Actually Winners    Actually Losers
Predicted Winners: 176                 77
Predicted Losers:  63                  132


In [None]:
#Saving Best Weights
torch.save(weight_list[epoch_val], "/content/drive/MyDrive/CNNWeights_Apr8_1_300E_0.001LR.torch")

##Testing Saved Model

In [None]:
#Test saved model
value, WW, WL, LW, LL = check_accuracy(test_data_data, test_data_label, model, printing = True, matrix_data = True)
print(f"Testing Accuracy: {value}\n")
print("                    Actually Winners    Actually Losers\n"
    + f"Predicted Winners: {WW}                  {WL}\n"
    + f"Predicted Losers:  {LW}                  {LL}")

value, WW, WL, LW, LL = check_accuracy(train_data_data, train_data_label, model, printing = True, matrix_data = True)
print(f"Training Accuracy: {value}\n")
print("                    Actually Winners    Actually Losers\n"
    + f"Predicted Winners: {WW}                  {WL}\n"
    + f"Predicted Losers:  {LW}                  {LL}")