In [13]:
from operator import itemgetter

import numpy as np
from sklearn import datasets, grid_search
from sklearn.model_selection import train_test_split
from neupy import algorithms, estimators, environment


environment.reproducible()


def scorer(network, X, y):
    result = network.predict(X)
    return estimators.rmsle(result, y)


def report(grid_scores, n_top=3):
    scores = sorted(grid_scores, key=itemgetter(1), reverse=False)
    for i, score in enumerate(scores[:n_top]):
        print("Model with rank: {0}".format(i + 1))
        print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
              score.mean_validation_score,
              np.std(score.cv_validation_scores)))
        print("Parameters: {0}".format(score.parameters))
        print("")


dataset = datasets.load_diabetes()
x_train, x_test, y_train, y_test = train_test_split(
    dataset.data, dataset.target, test_size=0.3
)

grnnet = algorithms.GRNN(std=0.5, verbose=True)
grnnet.train(x_train, y_train)
error = scorer(grnnet, x_test, y_test)
print("GRNN RMSLE = {:.3f}\n".format(error))

print("Run Random Search CV")
grnnet.verbose = False
random_search = grid_search.RandomizedSearchCV(
    grnnet,
    param_distributions={'std': np.arange(1e-2, 1, 1e-4)},
    n_iter=400,
    scoring=scorer,
)
random_search.fit(dataset.data, dataset.target)
report(random_search.grid_scores_)




Main information

[ALGORITHM] GRNN

[OPTION] verbose = True
[OPTION] epoch_end_signal = None
[OPTION] show_epoch = 1
[OPTION] shuffle_data = False
[OPTION] step = 0.1
[OPTION] train_end_signal = None
[OPTION] std = 0.5

GRNN RMSLE = 0.513

Run Random Search CV
Model with rank: 1
Mean validation score: 0.427 (std: 0.013)
Parameters: {'std': 0.06279999999999968}

Model with rank: 2
Mean validation score: 0.427 (std: 0.012)
Parameters: {'std': 0.0598999999999997}

Model with rank: 3
Mean validation score: 0.427 (std: 0.012)
Parameters: {'std': 0.0594999999999997}



In [14]:
import os
import math
import random
import argparse

import numpy as np
from scipy.io import wavfile


current_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(current_dir, 'data')
music_dir = os.path.join(data_dir, 'music_wav')
speech_dir = os.path.join(data_dir, 'speech_wav')

splited_data_file = os.path.join(data_dir, 'splited_data.npz')

train_size = 0.85

SPEECH = 0
MUSIC = 1

parser = argparse.ArgumentParser()
parser.add_argument('--seed', '-s', default=None, dest='seed',
                    help="This parameter makes results reproduceble",
                    type=int)


def train_test_data():
    data = np.load(splited_data_file)
    return data['x_train'], data['x_test'], data['y_train'], data['y_test']


if __name__ == '__main__':
    print("Start read data")
    args = parser.parse_args()

    if args.seed is not None:
        np.random.seed(args.seed)
        random.seed(args.seed)

    x_train, x_test = [], []
    y_train, y_test = [], []

    for class_code, directory in enumerate([music_dir, speech_dir]):
        filenames = os.listdir(directory)
        n_train_samples = math.floor(len(filenames) * train_size)
        train_filenames = random.sample(filenames, k=n_train_samples)

        for filename in filenames:
            full_filepath = os.path.join(directory, filename)
            _, wav_vector = wavfile.read(full_filepath)

            if filename in train_filenames:
                x_train.append(wav_vector)
            else:
                x_test.append(wav_vector)

        classes = np.repeat(class_code, len(filenames))
        y_train = np.concatenate([y_train, classes[:n_train_samples]])
        y_test = np.concatenate([y_test, classes[n_train_samples:]])

    x_train = np.array(x_train)
    x_test = np.array(x_test)

    print("Train data shape: {}".format(x_train.shape))
    print("Test data shape: {}".format(x_test.shape))

    print("Save data in file")
    np.savez(splited_data_file, x_train=x_train, x_test=x_test,
             y_train=y_train, y_test=y_test)

NameError: name '__file__' is not defined