In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import torch

import ikrlib as ikrl
from augment import augment_images

ModuleNotFoundError: No module named 'librosa'

# Image
## CNN

In [None]:
data_augmentation_enabled = True

if data_augmentation_enabled:
    augment_images('train', 'train/da')
    augment_images('dev', 'dev/da')

In [3]:
from image import CustomDataset, CNNModel

CLASSES = 31

train_x = np.empty((0, 80, 80, 3))
train_y = np.empty((0), dtype=int)

test_x = np.empty((0, 80, 80, 3))
test_y = np.empty((0), dtype=int)

for i in range(1, CLASSES + 1):
    train_i = np.array(list(ikrl.png_load(os.path.join("train", str(i)), False).values()))
    label_i = np.full(len(train_i), i - 1)
    train_x = np.concatenate((train_x, train_i), axis=0)
    train_y = np.concatenate((train_y, label_i), axis=0)

    train_i = np.array(list(ikrl.png_load(os.path.join("dev", str(i)), False).values()))
    label_i = np.full(len(train_i), i - 1)
    train_x = np.concatenate((train_x, train_i), axis=0)
    train_y = np.concatenate((train_y, label_i), axis=0)

    test_i = np.array(list(ikrl.png_load(os.path.join("dev", str(i)), False).values()))
    label_i = np.full(len(test_i), i - 1)
    test_x = np.concatenate((test_x, test_i), axis=0)
    test_y = np.concatenate((test_y, label_i), axis=0)

print("Images were successfully loaded")

# convert 80,80,3 to 3,80,80
train_x = np.array(train_x)
train_x = np.transpose(train_x, (0, 3, 1, 2))

test_x = np.array(test_x)
test_x = np.transpose(test_x, (0, 3, 1, 2))

# Convert NumPy arrays to PyTorch tensors
train_tensors = torch.Tensor(train_x)
test_tensors = torch.Tensor(test_x)

# Create new TensorDataset instances with the modified labels
train_dataset = CustomDataset(train_tensors, train_y)
test_dataset = CustomDataset(test_tensors, test_y)
print("Dataset was successfully created")

#TODO pls debug no work
model = CNNModel(num_classes=31, lr=1e-4)
model.train_net(train_dataset, test_dataset, num_epochs=200)
print("Model was successfully trained")


Images were successfully loaded
Dataset was successfully created
Epoch: 1/5, Loss: 182.1969, Accuracy: 0.1129, 7 and 62
Epoch: 5/5, Loss: 73.5008, Accuracy: 0.1129, 7 and 62
Model was successfully trained


In [4]:
eval_data = np.array(list(ikrl.png_load('eval', False).values()))
eval_data = np.array(eval_data)
eval_data = np.transpose(eval_data, (0, 3, 1, 2))
eval_data = torch.Tensor(eval_data)

eval_dataset = CustomDataset(eval_data, np.zeros(len(eval_data)))

pred = model.predict(eval_dataset)
final_preds = [torch.max(x.data, 1)[1] for x in pred]


## SVM

In [None]:
from svm import SVCTrain

model = SVCTrain()
model.train_svc(train_dataset, test_dataset)
print(model.predict(test_x[0]))

## GMM

In [None]:
from gmm import GMMmodel

train_dataset_np = np.array(train_dataset.images)
train_dataset_mean_face = np.mean(train_dataset_np, axis=0)
plt.imshow(train_dataset_mean_face.transpose(1, 2, 0).astype(np.uint8))
plt.title("You main not like this but this is what peak performance looks like (mean face) ")
plt.axis('off')

#plot mean face


model = GMMmodel()
model.train_gmm(train_dataset, test_dataset)

In [6]:
res = model.predict(eval_dataset.images)
print(res)

ValueError: operands could not be broadcast together with shapes (736,6400) (310,) 

## Audio

In [None]:
from audio import Audio

In [None]:
cepstral_mean_subtraction_enabled = False
delta_coefficients_enabled = False
coefficients_normalization = False

audio_adjust_enabled = True
reduce_noise_enabled = True
data_augmentation_enabled = True
data_pre_emphasis = False

In [None]:
audio = Audio(CLASSES, "train", "eval")
if data_pre_emphasis:
    train_audio, dev_audio = audio.do_data_pre_emphasis()
else:
    train_audio, dev_audio = audio.do_classic_load()
train_audio = audio.do_coefficients_normalization(train_audio, coefficients_normalization)
train_audio = audio.do_delta_coefficients(train_audio, delta_coefficients_enabled)
train_audio = audio.do_cepstral_mean_subtraction(train_audio, cepstral_mean_subtraction_enabled)

In [None]:
Ws, MUs, COVs = audio.train_gmm(train_audio, 3, 30)

In [None]:
predicted_classes, accuracy = audio.eval(dev_audio, Ws, MUs, COVs, eval_format='new')
print(predicted_classes)

## Majority voting

In [None]:
def majority_voting(pred_gmm, pred_svm, pred_cnn):
    return np.array([np.argmax(np.bincount([a, b, c])) for a, b, c in zip(pred_gmm, pred_svm, pred_cnn)])

# Assuming you have already classified the test images using the three models
# and have the predictions in the following variables:
# pred_gmm, pred_svm, pred_cnn

# Combine the predictions using majority voting
pred_ensemble = majority_voting(pred_gmm, pred_svm, pred_cnn)

# Calculate the accuracy of the ensemble
ensemble_accuracy = np.sum(test_y == pred_ensemble) / len(test_y)
print("Ensemble accuracy:", ensemble_accuracy)