In [1]:
%%html
<style>@import url('../style.css')</style><script>IPython.OutputArea.prototype._should_scroll = function(){return false}</script>

In [None]:
class Dataset:
    def __init__(self, features, fold_testing, fold_validation, shape=(-1, 60, 101, 1)):
        train = features[(features['fold'] != fold_testing) & (features['fold'] != fold_validation)]
        validation = features[(features['fold'] == fold_validation) & (features['augmented'] == 0)]
        self.test = features[(features['fold'] == fold_testing) & (features['augmented'] == 0)].copy()
        self.validation_pd = validation.copy()
        
        self.shape = shape
        self.start = 'logspec_b0_f0'
        self.end = features.columns[-1]
        class_count = len(pd.unique(features['category']))
        
        X = train.loc[:, self.start:self.end].as_matrix()
        y = Dataset.to_one_hot(train['category'].as_matrix(), class_count)
        
        X_validation = validation.loc[:, self.start:self.end].as_matrix()
        y_validation = Dataset.to_one_hot(validation['category'].as_matrix(), class_count)        
        
        X_mean = np.mean(X)
        X_std = np.std(X)
        
        X = (X - X_mean) / X_std
        X_validation = (X_validation - X_mean) / X_std

        X = np.reshape(X, self.shape, order='F')
        X_validation = np.reshape(X_validation, self.shape, order='F')
        
        X = self.generate_deltas(X)
        X_validation = self.generate_deltas(X_validation)
        
        self.train = pl2.datasets.DenseDesignMatrix(topo_view=X, y=y)
        self.validation = pl2.datasets.DenseDesignMatrix(topo_view=X_validation, y=y_validation)
        
        self.X_mean = X_mean
        self.X_std = X_std
    
    def generate_deltas(self, X):
        new_dim = np.zeros(np.shape(X))
        X = np.concatenate((X, new_dim), axis=3)
        del new_dim
        gc.collect()
        
        for i in range(len(X)):
            X[i, :, :, 1] = librosa.feature.delta(X[i, :, :, 0])
            
        return X
    
    def _verify_predictions(self, model, df, verbose):
        X = df.loc[:, self.start:self.end].as_matrix()
        X = (X - self.X_mean) / self.X_std
        X = np.reshape(X, self.shape, order='F')
        X = self.generate_deltas(X)
        
        if NET_MODE == 'majority':
            df.loc[:, 'prediction'] = Dataset.predict(model, X)
        
            group = df.groupby('filename', sort=False)
            group = group[['category', 'prediction']].agg(lambda x: x.value_counts().index[0]) # majority vote
        else:
            predictions = Dataset.predict(model, X)
            cols = map(lambda x: 'prediction' + str(x), range(np.shape(self.train.y)[1]))

            for idx, col in enumerate(cols):
                df.loc[:, col] = predictions[:, idx]

            group = df.groupby('filename', sort=False)
            group = group[['category'] + cols].agg(np.mean)
            group.loc[:, 'prediction'] = np.argmax(group[cols].as_matrix(), axis=1)            
            
        accuracy = np.sum(group['category'] == group['prediction']) / float(len(group['category']))
        
        if verbose:
            print 'Test accuracy: ' + str(int(accuracy * 1000) / 10.0) + '%'
        
        return accuracy
    
    def validate_model(self, model, verbose=True):
        return self._verify_predictions(model, self.validation_pd, verbose)
    
    def test_model(self, model, verbose=True):
        return self._verify_predictions(model, self.test, verbose)

    @classmethod
    def predict(cls, model, X):
        inputs = model.get_input_space().make_theano_batch()
        outputs = model.fprop(inputs)
        
        if NET_MODE == 'majority':
            outputs = theano.tensor.argmax(outputs, axis=1)
            
        predict = theano.function([inputs], outputs, allow_input_downcast=True)
        return predict(X)
    
    @classmethod
    def to_one_hot(cls, labels, class_count):
        one_hot_enc = np.zeros((len(labels), class_count))

        for r in range(len(labels)):
            one_hot_enc[r, labels[r]] = 1

        return one_hot_enc