## Init Values

In [None]:
train_percentaje = 0.8
validation_percentaje = 0.1
test_percentaje = 0.1

genres = ['blues', 'classical', 'country', 'disco',
          'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

## Select data for each model

### Global get_data

In [None]:
import numpy as np

def index_genre(genre, genres):
    for (g,index) in zip(genres, range(len(genres))):
        if(g == genre):
            return index
    return -1

def get_data(data_path, genres, decoder, training_percentaje=0.6, validation_percentaje=0.2, test_percentaje=0.2):
    """ 
    data_path: se le pasa la direccion de la carpeta donde se encuentra la base de datos.
    genres: se le pasa una lista con los nombres da cada carpeta que contiene un genero dado.    
    decoder: funcion para decodificar el dato que se le pasa, por ejemplo en caso de imagenes habria hacer imread
    """
    
    data_training = {'in': [], 'out': []}
    data_validation = {'in': [], 'out': []}
    data_test = {'in': [], 'out': []}

    for genre in genres:
        files = os.listdir(data_path + genre)
        count = len(files)

        for (filename, index) in zip(files, range(len(files))):
            filepath = data_path + genre + '/' + filename

            if (index < training_percentaje * count):
                data_training['in'].append(decoder(filepath))
                data_training['out'].append(index_genre(genre, genres))
                continue

            if (index < (training_percentaje + validation_percentaje) * count):
                data_validation['in'].append(decoder(filepath))
                data_validation['out'].append(index_genre(genre, genres))
            else:
                data_test['in'].append(decoder(filepath))
                data_test['out'].append(index_genre(genre, genres))

    data_training = {'in': np.array(data_training['in']),'out': np.array(data_training['out'])}            
    data_validation = {'in': np.array(data_validation['in']),'out': np.array(data_validation['out'])}            
    data_test = {'in': np.array(data_test['in']),'out': np.array(data_test['out'])}            

    return {
        'data_training': data_training,
        'data_validation': data_validation,
        'data_testing': data_test
    }

### Get data for MFCC model

In [None]:
import cv2
data_path = 'url'

def read_image(filepath):
    global size_images
    image = cv2.imread(filepath)
    return cv2.resize(image, size_images)

def get_data_mfcc(training_percentaje=0.8, validation_percentaje=0.1, test_percentaje=0.1):
    return get_data(data_path, genres, read_image,training_percentaje,validation_percentaje,test_percentaje)

mfcc_data = get_data_mfcc(train_percentaje, validation_percentaje, test_percentaje)

### Get data for wavelet model

In [None]:
import librosa
from collections import Counter
import scipy
import dtcwt

data_path = 'url'

trans = dtcwt.Transform1d(biort='antonini', qshift='qshift_d')

def calculate_entropy(list_values):
	counter_values = Counter(list_values).most_common()
	probabilities = [elem[1]/len(list_values) for elem in counter_values]
	entropy=scipy.stats.entropy(probabilities)
	return entropy

def calculate_statistics(list_values):
	n5 = np.nanpercentile(list_values, 5)
	n25 = np.nanpercentile(list_values, 25)
	n75 = np.nanpercentile(list_values, 75)
	n95 = np.nanpercentile(list_values, 95)
	median = np.nanpercentile(list_values, 50)
	mean = np.nanmean(list_values)
	std = np.nanstd(list_values)
	var = np.nanvar(list_values)
	rms = np.nanmean(np.sqrt(list_values**2))
	return [n5, n25, n75, n95, median, mean, std, var, rms]

def calculate_crossings(list_values):
	zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
	no_zero_crossings = len(zero_crossing_indices)
	mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
	no_mean_crossings = len(mean_crossing_indices)
	return [no_zero_crossings, no_mean_crossings]

def get_features(list_values):
	entropy = calculate_entropy(list_values)
	crossings = calculate_crossings(list_values)
	statistics = calculate_statistics(list_values)
	return [entropy] + crossings + statistics

def extract_dtcwt(file_path:str):    
    d, fs = librosa.load(file_path)
    forw = trans.forward(d, nlevels=17)
    features = []
    for coeff in forw.highpasses:
        temp = (np.abs(coeff.squeeze()))
        features += get_features(temp)
        
    features += get_features(forw.lowpass.squeeze())    
    return features

def get_data_wavelet(training_percentaje=0.8, validation_percentaje=0.1, test_percentaje=0.1):
    return get_data(data_path, genres, extract_dtcwt, training_percentaje,validation_percentaje,test_percentaje)

wavalet_data = get_data_mfcc(train_percentaje, validation_percentaje, test_percentaje)

### Fill all datas 

In [None]:
data = { 'mfcc': mfcc_data,
         'wavalet': wavalet_data
}

## Import model.h5 for each model

In [None]:
models = {}
pass

## Train the ensemble model

In [None]:
def train():
    train_data = []
    for key,model in models.items():
        train_data.append(model.predict(data[key]['train']))
        