In [None]:
import os
import shutil

if not os.path.exists('SUR_projekt2022-2023.zip'):
    !wget https://www.fit.vutbr.cz/study/courses/SUR/public/projekt_2022-2023/SUR_projekt2022-2023.zip
    !unzip SUR_projekt2022-2023.zip
if not os.path.exists('SUR_projekt2022-2023_eval.zip'):
    !wget https://www.fit.vutbr.cz/study/courses/SUR/public/projekt_2022-2023/SUR_projekt2022-2023_eval.zip
    !unzip SUR_projekt2022-2023_eval.zip


In [None]:
delete_dirs = False

if delete_dirs:
    parent_dirs = ["train", "dev"]

    for parent_dir in parent_dirs:
        for i in range(1, 32):
            current_dir = os.path.join(parent_dir, str(i))

            # Make sure the directory exists
            if os.path.exists(current_dir):
                for subdir in os.listdir(current_dir):
                    subdir_path = os.path.join(current_dir, subdir)
                    if os.path.isdir(subdir_path):
                        shutil.rmtree(subdir_path)
                        print(f"Removed directory: {subdir_path}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from augment import augment_images
import os
import shutil
import ikrlib as ikrl

CLASSES = 31
if not os.path.exists("results"):
    os.mkdir("results")

# Image
## CNN

In [None]:
data_augmentation_enabled = True

# Copy all to train
for cls in range(1, CLASSES + 1):
    for f in os.listdir(os.path.join("dev", str(cls))):
       shutil.copy(os.path.join("dev", str(cls), f), os.path.join("train", str(cls), f))


if data_augmentation_enabled:
    augment_images('train', 'train/da')
    augment_images('dev', 'dev/da')

In [None]:
from image import CustomDataset, CNNModel

train_x = np.empty((0, 80, 80, 3))
train_y = np.empty(0, dtype=int)

test_x = np.empty((0, 80, 80, 3))
test_y = np.empty(0, dtype=int)

for i in range(1, CLASSES + 1):
    train_i = np.array(list(ikrl.png_load(os.path.join("train", str(i)), False).values()))
    label_i = np.full(len(train_i), i - 1)
    train_x = np.concatenate((train_x, train_i), axis=0)
    train_y = np.concatenate((train_y, label_i), axis=0)

    train_i = np.array(list(ikrl.png_load(os.path.join("dev", str(i)), False).values()))
    label_i = np.full(len(train_i), i - 1)
    train_x = np.concatenate((train_x, train_i), axis=0)
    train_y = np.concatenate((train_y, label_i), axis=0)

    test_i = np.array(list(ikrl.png_load(os.path.join("dev", str(i)), False).values()))
    label_i = np.full(len(test_i), i - 1)
    test_x = np.concatenate((test_x, test_i), axis=0)
    test_y = np.concatenate((test_y, label_i), axis=0)

print("Images were successfully loaded")

# convert 80,80,3 to 3,80,80
train_x = np.array(train_x)
train_x = np.transpose(train_x, (0, 3, 1, 2))

test_x = np.array(test_x)
test_x = np.transpose(test_x, (0, 3, 1, 2))

train_tensors = torch.Tensor(train_x)
test_tensors = torch.Tensor(test_x)

train_dataset = CustomDataset(train_tensors, train_y)
test_dataset = CustomDataset(test_tensors, test_y)
print("Dataset was successfully created")

model = CNNModel(num_classes=31, lr=1e-4)
model.train_net(train_dataset, test_dataset, num_epochs=100)
print("Model was successfully trained")


In [None]:
import numpy as np

data = ikrl.png_load('eval', False)
filenames = list(data.keys())
filenames = np.array([filename.replace("eval/", "") for filename in filenames])
eval_data = np.array(list(data.values()))
eval_data = np.array(eval_data)
eval_data = np.transpose(eval_data, (0, 3, 1, 2))
eval_data = torch.Tensor(eval_data)

eval_dataset = CustomDataset(eval_data, np.zeros(len(eval_data)))
pred = model.predict(eval_dataset)

pred = np.stack([tensor.detach().numpy() for tensor in pred])
exp_matrix = np.exp(pred)
row_sums = exp_matrix.sum(axis=2, keepdims=True)
cnn_predictions = exp_matrix / row_sums
cnn_predictions = np.squeeze(cnn_predictions, axis=1)
print(cnn_predictions.shape)

sorted_indices = np.argsort(filenames)
sorted_filenames = filenames[sorted_indices]
sorted_cnn_prob = np.array(cnn_predictions)[sorted_indices]

with open("results/cnn_prob_table.txt", "w") as f:
    for file, correspond_pred in zip(sorted_filenames, sorted_cnn_prob):
        pred = np.argmax(correspond_pred)
        f.write(file + ' ' + str(pred + 1) + ' ' + ' '.join(map(str, correspond_pred)) + '\n')


## SVM

In [None]:
from svm import SVCTrain

model = SVCTrain()
model.train_svc(train_dataset, eval_dataset)
svm_prob = model.predict_whole_dataset(eval_data)

# Sort filenames and get the indices for sorting the matrix
sorted_indices = np.argsort(filenames)
sorted_filenames = filenames[sorted_indices]
sorted_svm_prob = np.array(svm_prob)[sorted_indices]

# Write the sorted results to a file
with open("results/svm_prob_table.txt", "w") as f:
    for file, correspond_pred in zip(sorted_filenames, sorted_svm_prob):
        pred = np.argmax(correspond_pred)
        f.write(file + ' ' + str(pred + 1) + ' ' + ' '.join(map(str, correspond_pred)) + '\n')

## GMM

In [None]:
from gmm import GMMmodel

train_dataset_np = np.array(train_dataset.images)
train_dataset_mean_face = np.mean(train_dataset_np, axis=0)
plt.imshow(train_dataset_mean_face.transpose(1, 2, 0).astype(np.uint8))
plt.title("You main not like this but this is what peak performance looks like (mean face) ")
plt.axis('off')

model = GMMmodel()
eval_subs_mean = model.train_gmm(train_dataset, test_dataset, eval_dataset)

In [None]:
res = model.predict(eval_subs_mean[0])
res = res.T

# Subtract the maximum value from each row
res -= np.max(res, axis=1, keepdims=True)

# Calculate the exponentials
prob_matrix = np.exp(res)

# Normalize the probabilities so that they sum to 1 for each data point
gmm_image_prob = prob_matrix / prob_matrix.sum(axis=1, keepdims=True)

In [None]:
# Sort filenames and get the indices for sorting the matrix
sorted_indices = np.argsort(filenames)
sorted_filenames = filenames[sorted_indices]
sorted_gmm_image = np.array(gmm_image_prob)[sorted_indices]

# Write the sorted results to a file
with open("results/gmm_image_prob_table.txt", "w") as f:
    for file, correspond_pred in zip(sorted_filenames, sorted_gmm_image):
        pred = np.argmax(correspond_pred)
        f.write(file + ' ' + str(pred + 1) + ' ' + ' '.join(map(str, correspond_pred)) + '\n')

## Audio

In [None]:
from audio import Audio
import numpy as np

In [None]:
cepstral_mean_subtraction_enabled = False
delta_coefficients_enabled = False
coefficients_normalization = False

audio_adjust_enabled = True
reduce_noise_enabled = True
data_augmentation_enabled = True
data_pre_emphasis = False

CLASSES = 31
generate_data = True

In [None]:
audio = Audio(CLASSES, "train", "eval")
if generate_data:
    audio.do_audio_adjust(audio_adjust_enabled)
    audio.do_reduce_noise(reduce_noise_enabled)
    audio.do_data_augmentation(data_augmentation_enabled)

In [None]:
if data_pre_emphasis:
    train_audio, dev_audio = audio.do_data_pre_emphasis()
else:
    train_audio, dev_audio = audio.do_classic_load()
train_audio = audio.do_coefficients_normalization(train_audio, coefficients_normalization)
train_audio = audio.do_delta_coefficients(train_audio, delta_coefficients_enabled)
train_audio = audio.do_cepstral_mean_subtraction(train_audio, cepstral_mean_subtraction_enabled)

In [None]:
Ws, MUs, COVs = audio.train_gmm(train_audio, 10, 50)

In [None]:
gmm_audio_prob, filenames = audio.eval(dev_audio, Ws, MUs, COVs, eval_format='new')

In [None]:
filenames = np.array([filename.replace("eval/rn/", "") for filename in filenames])
# Sort filenames and get the indices for sorting the matrix
sorted_indices = np.argsort(filenames)
sorted_filenames = filenames[sorted_indices]
sorted_gmm_audio_prob = np.array(gmm_audio_prob)[sorted_indices]

# Write the sorted results to a file
with open("results/gmm_audio_prob_table.txt", "w") as f:
    for file, correspond_pred in zip(sorted_filenames, sorted_gmm_audio_prob):
        pred = np.argmax(correspond_pred)
        f.write(file + ' ' + str(pred + 1) + ' ' + ' '.join(map(str, correspond_pred)) + '\n')

## Average models result

In [None]:
cnn_weight = 0.31
audio_weight = 0.24
svm_weight = 0.21
gmm_image_weight = 0.24

assert (cnn_weight + audio_weight + svm_weight + gmm_image_weight == 1)

# Multiply each model's probability matrix by its corresponding weight
weighted_gmm_audio_prob = sorted_gmm_audio_prob * audio_weight
weighted_gmm_image_prob = sorted_gmm_image * gmm_image_weight
weighted_svm_prob = sorted_svm_prob * svm_weight
weighted_cnn_prob = sorted_cnn_prob * cnn_weight

ws = weighted_gmm_audio_prob + weighted_gmm_image_prob + weighted_svm_prob + weighted_cnn_prob
average_all = ws / np.sum(ws, axis=1, keepdims=True)

cnn_weight = 0.4
audio_weight = 0.3
gmm_image_weight = 0.3

assert (cnn_weight + audio_weight + gmm_image_weight == 1)

# Multiply each model's probability matrix by its corresponding weight
weighted_gmm_audio_prob = sorted_gmm_audio_prob * audio_weight
weighted_gmm_image_prob = sorted_gmm_image * gmm_image_weight
weighted_cnn_prob = sorted_cnn_prob * cnn_weight

ws = weighted_gmm_audio_prob + weighted_gmm_image_prob + weighted_cnn_prob
average_gmm_cnn = ws / np.sum(ws, axis=1, keepdims=True)

cnn_weight = 0.6
audio_weight = 0.4

assert (cnn_weight + audio_weight == 1)

# Multiply each model's probability matrix by its corresponding weight
weighted_gmm_audio_prob = sorted_gmm_audio_prob * audio_weight
weighted_cnn_prob = sorted_cnn_prob * cnn_weight

ws = weighted_gmm_audio_prob + weighted_cnn_prob
average_gmm_audio_cnn = ws / np.sum(ws, axis=1, keepdims=True)

with open("results/average_all.txt", "w") as f:
    for file, correspond_pred in zip(sorted_filenames, average_all):
        pred = np.argmax(correspond_pred)
        f.write(file + ' ' + str(pred + 1) + ' ' + ' '.join(map(str, correspond_pred)) + '\n')

with open("results/average_gmm_cnn.txt", "w") as f:
    for file, correspond_pred in zip(sorted_filenames, average_gmm_cnn):
        pred = np.argmax(correspond_pred)
        f.write(file + ' ' + str(pred + 1) + ' ' + ' '.join(map(str, correspond_pred)) + '\n')

with open("results/average_gmm_audio_cnn.txt", "w") as f:
    for file, correspond_pred in zip(sorted_filenames, average_gmm_audio_cnn):
        pred = np.argmax(correspond_pred)
        f.write(file + ' ' + str(pred + 1) + ' ' + ' '.join(map(str, correspond_pred)) + '\n')