In [46]:
import numpy as np
import numpy.polynomial.polynomial as poly
import scipy as sp
import scipy.interpolate 
from pprint import pprint
import math
%pprint

Pretty printing has been turned OFF


In [47]:
speakerN = 9
trainSamplesNs = [30] * speakerN
testSamplesNs = [31, 35, 88, 44, 29, 24, 40, 50, 29]
channelsN = 12

In [48]:
def speakerFromSampleN(sampleN, sampleNs):
    s = 0
    for i, N in enumerate(sampleNs):
        s += N
        if s >= sampleN:
            return i

In [49]:
def speakerNFromSampleN(sampleN, sampleNs):
    return sampleNs[speakerFromSampleN(sampleN, sampleNs)]

In [50]:
def loadData(name):
    samples = []
    collector = []
    with open(name) as f:
        for line in f:
            line = line.replace('\r\n', '')
            if line == '':
                continue
            numbers = [float(x) for x in line.split()]
            if all([x == 1.0 for x in numbers]):
                samples.append(np.array(collector))
                collector = []
            else:
                collector.append(numbers)
    return samples

trainDataRaw = loadData('ae.train')
testDataRaw = loadData('ae.test')

In [51]:
def prepareData(data, sampleNs):
    dataNew = []
    for i,sample in enumerate(data):
        l = len(sample)
        sample = np.append(sample, (np.arange(1,l+1)/float(l)).reshape(l,1), axis = 1)
        sample = np.append(sample, np.ones((l,1))*float(l)/speakerNFromSampleN(i, sampleNs), axis = 1)
        dataNew.append(sample)
    return dataNew
        
trainDataPrepared = prepareData(trainDataRaw, trainSamplesNs)
testDataPrepared = prepareData(testDataRaw, testSamplesNs)

In [52]:
def prepareTargets(data, Ns):
    targets = []
    for speakerI, speakerSampleN in enumerate(Ns):
        for n in xrange(speakerSampleN):
            sample = data[len(targets)]
            l = len(sample)
            target = np.zeros((l,speakerN))
            target[:, speakerI] = np.ones(l)
            targets.append(target)
    return targets
        
trainTargets = prepareTargets(trainDataPrepared, trainSamplesNs)
testTargets = prepareTargets(testDataPrepared, testSamplesNs)

In [53]:
def normalizeData(data, shifts = None, scales = None):
    allData = np.zeros((0,channelsN+2))
    for sample in data:
        allData = np.append(allData, sample, axis = 0)

    
    if shifts is None and scales is None:
        maxVals = allData[:, 0:12].max(axis = 0)
        minVals = allData[:, 0:12].min(axis = 0)
        shifts = -minVals
        scales = 1.0/(maxVals - minVals)
    
    
    for i, sample in enumerate(data):
        tmp = data[i][:, 0:12]
        tmp += np.tile(shifts, (tmp.shape[0], 1))
        tmp = np.dot(tmp, np.diag(scales))
        data[i][:, 0:12] = tmp
    
    return data, shifts, scales
    

trainDataNormalized, trainShifts, trainScales = normalizeData(trainDataPrepared)
testDataNormalized,_,_ = normalizeData(testDataPrepared, trainShifts, trainScales)

In [97]:
#Interpolate datapoints so that each sample has only four timesteps
def smoothenData(data, smoothLength=4, polyOrder=3):
    smoothend = []
    newSample = np.zeros((4,12))
    for sample in data:
        size = sample.shape[0]
        pos = np.arange(1,size+1)
        interpolCoords = np.linspace(1,size,smoothLength) 
        polycoeff = poly.polyfit(range(size),sample[:,0:12],polyOrder)
        sampleSmooth = poly.polyval(range(size), polycoeff)
        for i in xrange(12):
            newSample[:,i] = sp.interpolate.interp1d(pos,sampleSmooth[i])(interpolCoords)
        smoothend.append(newSample) 
    return smoothend    

trainDataSmoothend = smoothenData(trainDataNormalized)
testDataSmoothend = smoothenData(testDataNormalized)

(4, 12)
