In [1]:
import os
import torch
import numpy as np
import torch.nn as nn
import ikrlib as ikrl
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
from glob import glob
from PIL import Image
from torch.utils.data import DataLoader
from augment import augment_images

# Image

In [None]:
data_augmentation_enabled = True

if data_augmentation_enabled:
    augment_images('train', 'train/da')
    augment_images('dev', 'dev/da')

In [13]:
from image import CustomDataset, SmallCNNMultiClass, train

CLASSES = 31

if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu" 

train_x = np.empty((0,80,80,3))
train_y = np.empty((0),dtype=int)

test_x = np.empty((0,80,80,3))
test_y = np.empty((0),dtype=int)

for i in range(1,CLASSES+1):
    train_i = np.array(list(ikrl.png_load(os.path.join("train",str(i)), False).values()))
    label_i = np.full(len(train_i),i-1)
    train_x = np.concatenate((train_x, train_i), axis=0)
    train_y = np.concatenate((train_y, label_i), axis=0)

    test_i = np.array(list(ikrl.png_load(os.path.join("dev",str(i)), False).values()))
    label_i = np.full(len(test_i),i-1)
    test_x = np.concatenate((test_x, test_i), axis=0)
    test_y = np.concatenate((test_y, label_i), axis=0)

print("Images were successfully loaded")

# convert 80,80,3 to 3,80,80
train_x = np.array(train_x)
train_x = np.transpose(train_x, (0, 3, 1, 2))

test_x = np.array(test_x)
test_x = np.transpose(test_x, (0, 3, 1, 2))

# Convert NumPy arrays to PyTorch tensors
train_tensors = torch.Tensor(train_x)
test_tensors = torch.Tensor(test_x)


# Create new TensorDataset instances with the modified labels
train_dataset = CustomDataset(train_tensors, train_y)
test_dataset = CustomDataset(test_tensors, test_y)
print("Dataset was successfully created")

model = SmallCNNMultiClass()
criterion = F.cross_entropy
model = model.to(dev)
# model = Net().to(dev)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
accuracys, losses = train(model, train_dataset, test_dataset, optimizer, criterion, dev, 500)

Images were successfully loaded
Dataset was successfully created
Epoch: 1/500, Loss: 55.0295, Accuracy: 0.0161, 1 and 62
Epoch: 51/500, Loss: 44.8971, Accuracy: 0.7258, 45 and 62
Epoch: 101/500, Loss: 43.9168, Accuracy: 0.8226, 51 and 62
Epoch: 151/500, Loss: 43.1473, Accuracy: 0.8548, 53 and 62
Epoch: 201/500, Loss: 44.3219, Accuracy: 0.8548, 53 and 62
Epoch: 251/500, Loss: 43.2679, Accuracy: 0.9194, 57 and 62
Epoch: 301/500, Loss: 42.5740, Accuracy: 0.9355, 58 and 62
Epoch: 351/500, Loss: 43.1705, Accuracy: 0.9194, 57 and 62
Epoch: 401/500, Loss: 43.5827, Accuracy: 1.0000, 62 and 62
Epoch: 451/500, Loss: 43.3906, Accuracy: 1.0000, 62 and 62
Epoch: 500/500, Loss: 43.3262, Accuracy: 1.0000, 62 and 62


In [None]:
from torch.utils.data import TensorDataset

test_x = np.empty((0,80,80,3))
test_x = np.array(list(ikrl.png_load('eval', False).values()))
test_x = np.transpose(test_x, (0, 3, 1, 2))

test_dataset = TensorDataset(torch.Tensor(test_x))
test_loader = DataLoader(test_dataset, batch_size=736)

for x in test_loader:
    pred = model(x[0].to(dev))
    _, pred = torch.max(pred, dim=1)
pred = pred + 1
print(pred)

## SVM

In [5]:
from svm import SVCTrain
from sklearn.model_selection import train_test_split
from image import CustomDataset
CLASSES = 31

def png_load(dir_name):
    """
    Loads all *.png images from directory dir_name into a dictionary. Keys are the file names
    and values and 2D numpy arrays with corresponding grayscale images
    """
    features = {}
    for f in glob(dir_name + '/*.png'):
        features[f] = np.array(Image.open(f), dtype=np.float64)
    return features

train_x = np.empty((0,80,80,3))
train_y = np.empty((0),dtype=int)

for i in range(1,CLASSES+1):
    train_i = np.array(list(png_load(os.path.join("train",str(i))).values()))
    label_i = np.full(len(train_i),i-1)
    train_x = np.concatenate((train_x, train_i), axis=0)
    train_y = np.concatenate((train_y, label_i), axis=0)

train_x = np.transpose(train_x, (0, 3, 1, 2))

print("Images were successfully loaded")

train_x, test_x, train_y, test_y = train_test_split(train_x, train_y, test_size=0.3,random_state=109)

train_dataset = CustomDataset(train_x, train_y)

model = SVCTrain()
model.train(train_dataset, CustomDataset(test_x, test_y))
print(model.predict(test_x[0]))

Images were successfully loaded
[[0.06018147 0.09588032 0.019359   0.03357952 0.01881544 0.0235429
  0.16910213 0.01118661 0.04132677 0.01328554 0.01170306 0.02117478
  0.13587252 0.01542163 0.00599437 0.03110354 0.04310296 0.01890837
  0.00945689 0.03709076 0.00924805 0.00837855 0.00654431 0.00542381
  0.01984617 0.03578422 0.04744238 0.02587904 0.00577893 0.01391617
  0.00566977]]


## GMM

In [1]:
from gmm_image import train_gmm, eval, augment_images_gmm

CLASSES = 31

data_augmentation_enabled = False

if data_augmentation_enabled:
    for i in range(1,CLASSES+1):
        augment_images_gmm(f"train/{i}", f"train/{i}/da", 3)

dev_subs_mean, ws_list, mus_list, covs_list = train_gmm()
eval(dev_subs_mean, ws_list, mus_list, covs_list)


Loading data was successful
Creating subs mean classes
Training GMM
Evaluating GMM classes
Accuracy: 0.12903225806451613


## Audio

In [1]:
from audio import Audio

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cepstral_mean_subtraction_enabled = False
delta_coefficients_enabled = False
coefficients_normalization = False

audio_adjust_enabled = True
reduce_noise_enabled = True
data_augmentation_enabled = True
data_pre_emphasis = False

CLASSES = 31

In [3]:
audio = Audio(CLASSES, 'train', 'eval')
audio.do_audio_adjust(audio_adjust_enabled)
audio.do_reduce_noise(reduce_noise_enabled)
audio.do_data_augmentation(data_augmentation_enabled)
if data_pre_emphasis:
    train_audio, dev_audio = audio.do_data_pre_emphasis()
else:
    train_audio, dev_audio = audio.do_classic_load()
train_audio = audio.do_coefficients_normalization(train_audio, coefficients_normalization)
train_audio = audio.do_delta_coefficients(train_audio, delta_coefficients_enabled)
train_audio = audio.do_cepstral_mean_subtraction(train_audio, cepstral_mean_subtraction_enabled)

Removing silence from records in directory train/1/
Removing silence from records in directory eval/1/


FileNotFoundError: [Errno 2] No such file or directory: 'eval/1/rs/'

In [None]:
Ws, MUs, COVs = audio.train(train_audio, 3, 30)

In [None]:
predicted_classes, accuracy = audio.eval(dev_audio, Ws, MUs, COVs)